linkedin-apply-assistant 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.yml +72 -0
  2. package/.github/ISSUE_TEMPLATE/config.yml +5 -0
  3. package/.github/ISSUE_TEMPLATE/config_help.yml +49 -0
  4. package/.github/ISSUE_TEMPLATE/docs.yml +40 -0
  5. package/.github/ISSUE_TEMPLATE/feature_request.yml +45 -0
  6. package/.github/ISSUE_TEMPLATE/safety_compliance.yml +48 -0
  7. package/.github/PULL_REQUEST_TEMPLATE.md +43 -0
  8. package/CHANGELOG.md +47 -0
  9. package/CODE_OF_CONDUCT.md +47 -0
  10. package/CONTRIBUTING.md +64 -0
  11. package/GOVERNANCE.md +41 -0
  12. package/LEGAL.md +38 -0
  13. package/LICENSE +22 -0
  14. package/MIGRATION.md +50 -0
  15. package/README.md +167 -0
  16. package/RELEASE_CHECKLIST.md +454 -0
  17. package/SAFETY.md +33 -0
  18. package/SECURITY.md +37 -0
  19. package/SUPPORT.md +44 -0
  20. package/THIRD_PARTY_NOTICES.md +67 -0
  21. package/bin/linkedin-apply-assistant.mjs +95 -0
  22. package/configs/config.example.yml +24 -0
  23. package/configs/qa_bank.example.yml +35 -0
  24. package/docs/apply.md +40 -0
  25. package/docs/assist.md +35 -0
  26. package/docs/browser-session.md +45 -0
  27. package/docs/ci-and-release-policy.md +105 -0
  28. package/docs/commands.md +176 -0
  29. package/docs/install-and-configuration.md +265 -0
  30. package/docs/registry-publication-strategy.md +169 -0
  31. package/docs/reports.md +35 -0
  32. package/docs/search.md +39 -0
  33. package/docs/troubleshooting.md +57 -0
  34. package/examples/dry_run_input.example.json +25 -0
  35. package/examples/reports/apply-audit.example.json +31 -0
  36. package/examples/reports/search-report.example.json +40 -0
  37. package/install.ps1 +178 -0
  38. package/package.json +59 -0
  39. package/pyproject.toml +51 -0
  40. package/src/linkedin_apply_assistant/__init__.py +8 -0
  41. package/src/linkedin_apply_assistant/apply_reports.py +229 -0
  42. package/src/linkedin_apply_assistant/ats_handlers.py +217 -0
  43. package/src/linkedin_apply_assistant/browser_sessions.py +155 -0
  44. package/src/linkedin_apply_assistant/cli.py +570 -0
  45. package/src/linkedin_apply_assistant/config.py +109 -0
  46. package/src/linkedin_apply_assistant/contracts.py +255 -0
  47. package/src/linkedin_apply_assistant/form_engine.py +180 -0
  48. package/src/linkedin_apply_assistant/linkedin_layer.py +436 -0
  49. package/src/linkedin_apply_assistant/page_actions.py +110 -0
  50. package/src/linkedin_apply_assistant/page_selectors.py +88 -0
  51. package/src/linkedin_apply_assistant/paths.py +135 -0
  52. package/src/linkedin_apply_assistant/qa_bank.py +352 -0
  53. package/src/linkedin_apply_assistant/redaction.py +119 -0
  54. package/src/linkedin_apply_assistant/safety.py +230 -0
  55. package/src/linkedin_apply_assistant/workflows.py +435 -0
@@ -0,0 +1,135 @@
1
+ """Runtime path resolution for the standalone assistant."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ import os
7
+ from pathlib import Path
8
+
9
+ try:
10
+ from platformdirs import user_cache_dir, user_config_dir, user_data_dir
11
+ except ModuleNotFoundError:
12
+
13
+ def _windows_base(env_name: str, fallback: Path) -> Path:
14
+ value = os.environ.get(env_name)
15
+ return Path(value).expanduser() if value else fallback
16
+
17
+ def user_config_dir(appname: str) -> str:
18
+ base = _windows_base("APPDATA", Path.home() / ".config")
19
+ return str(base / appname)
20
+
21
+ def user_data_dir(appname: str) -> str:
22
+ base = _windows_base("LOCALAPPDATA", Path.home() / ".local" / "share")
23
+ return str(base / appname)
24
+
25
+ def user_cache_dir(appname: str) -> str:
26
+ base = _windows_base("LOCALAPPDATA", Path.home() / ".cache")
27
+ return str(base / appname / "cache")
28
+
29
+
30
+ APP_NAME = "linkedin-apply-assistant"
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class RuntimePaths:
35
+ """Resolved paths for config, data, cache, browser profile, and outputs."""
36
+
37
+ workspace: Path | None
38
+ config_dir: Path
39
+ data_dir: Path
40
+ cache_dir: Path
41
+ config_file: Path
42
+ qa_bank_file: Path
43
+ browser_profile_dir: Path
44
+ output_dir: Path
45
+ reports_dir: Path
46
+
47
+
48
+ def _optional_path(value: str | Path | None) -> Path | None:
49
+ if value is None:
50
+ return None
51
+ return Path(value).expanduser()
52
+
53
+
54
+ def _resolve_under_workspace(workspace: Path, path: Path) -> Path:
55
+ if path.is_absolute():
56
+ return path
57
+ return workspace / path
58
+
59
+
60
+ def resolve_runtime_paths(
61
+ workspace: str | Path | None = None,
62
+ config: str | Path | None = None,
63
+ qa_bank: str | Path | None = None,
64
+ browser_profile: str | Path | None = None,
65
+ output_dir: str | Path | None = None,
66
+ ) -> RuntimePaths:
67
+ """Resolve runtime paths without touching the filesystem."""
68
+
69
+ workspace_path = _optional_path(workspace)
70
+ if workspace_path is not None:
71
+ workspace_path = workspace_path.resolve()
72
+ config_dir = workspace_path / "configs"
73
+ data_dir = workspace_path / "data"
74
+ cache_dir = workspace_path / ".cache"
75
+ default_config = config_dir / "config.yml"
76
+ default_qa_bank = config_dir / "qa_bank.yml"
77
+ default_browser_profile = workspace_path / "browser-profile"
78
+ default_output = workspace_path / "output"
79
+ else:
80
+ config_dir = Path(user_config_dir(APP_NAME))
81
+ data_dir = Path(user_data_dir(APP_NAME))
82
+ cache_dir = Path(user_cache_dir(APP_NAME))
83
+ default_config = config_dir / "config.yml"
84
+ default_qa_bank = config_dir / "qa_bank.yml"
85
+ default_browser_profile = data_dir / "browser-profile"
86
+ default_output = data_dir / "output"
87
+
88
+ config_override = _optional_path(config)
89
+ qa_bank_override = _optional_path(qa_bank)
90
+ browser_override = _optional_path(browser_profile)
91
+ output_override = _optional_path(output_dir)
92
+
93
+ if workspace_path is not None:
94
+ if config_override is not None:
95
+ config_override = _resolve_under_workspace(workspace_path, config_override)
96
+ if qa_bank_override is not None:
97
+ qa_bank_override = _resolve_under_workspace(workspace_path, qa_bank_override)
98
+ if browser_override is not None:
99
+ browser_override = _resolve_under_workspace(workspace_path, browser_override)
100
+ if output_override is not None:
101
+ output_override = _resolve_under_workspace(workspace_path, output_override)
102
+
103
+ resolved_output = output_override or default_output
104
+
105
+ return RuntimePaths(
106
+ workspace=workspace_path,
107
+ config_dir=config_dir,
108
+ data_dir=data_dir,
109
+ cache_dir=cache_dir,
110
+ config_file=config_override or default_config,
111
+ qa_bank_file=qa_bank_override or default_qa_bank,
112
+ browser_profile_dir=browser_override or default_browser_profile,
113
+ output_dir=resolved_output,
114
+ reports_dir=resolved_output / "reports",
115
+ )
116
+
117
+
118
+ def ensure_runtime_dirs(
119
+ paths: RuntimePaths,
120
+ *,
121
+ include_browser_profile: bool = False,
122
+ ) -> RuntimePaths:
123
+ """Create runtime directories that are safe for local package operation."""
124
+
125
+ for directory in (
126
+ paths.config_dir,
127
+ paths.data_dir,
128
+ paths.cache_dir,
129
+ paths.output_dir,
130
+ paths.reports_dir,
131
+ ):
132
+ directory.mkdir(parents=True, exist_ok=True)
133
+ if include_browser_profile:
134
+ paths.browser_profile_dir.mkdir(parents=True, exist_ok=True)
135
+ return paths
@@ -0,0 +1,352 @@
1
+ """Standalone Q&A matching with explicit data paths."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from difflib import SequenceMatcher
7
+ from pathlib import Path
8
+ import re
9
+ from typing import Any
10
+ import unicodedata
11
+
12
+ import yaml
13
+
14
+ from .paths import RuntimePaths
15
+ from .safety import domain_from_url, normalize_url_for_audit
16
+
17
+
18
+ MATCH_THRESHOLD = 0.75
19
+
20
+ STOPWORDS = {
21
+ "a",
22
+ "an",
23
+ "the",
24
+ "is",
25
+ "are",
26
+ "was",
27
+ "were",
28
+ "of",
29
+ "in",
30
+ "on",
31
+ "at",
32
+ "to",
33
+ "for",
34
+ "with",
35
+ "by",
36
+ "from",
37
+ "as",
38
+ "and",
39
+ "or",
40
+ "but",
41
+ "if",
42
+ "your",
43
+ "you",
44
+ "our",
45
+ "this",
46
+ "that",
47
+ "do",
48
+ "does",
49
+ "did",
50
+ "what",
51
+ "what's",
52
+ "how",
53
+ "have",
54
+ "has",
55
+ "be",
56
+ "been",
57
+ "being",
58
+ "will",
59
+ "would",
60
+ }
61
+
62
+
63
+ def normalize(text: str) -> str:
64
+ """Strip accents and punctuation, lowercase, and collapse whitespace."""
65
+
66
+ if not text:
67
+ return ""
68
+ normalized = unicodedata.normalize("NFKD", text)
69
+ normalized = "".join(ch for ch in normalized if not unicodedata.combining(ch))
70
+ normalized = normalized.lower()
71
+ normalized = re.sub(r"[^\w\s]", " ", normalized)
72
+ normalized = re.sub(r"\s+", " ", normalized)
73
+ return normalized.strip()
74
+
75
+
76
+ def tokenize(text: str) -> set[str]:
77
+ """Return content tokens for fuzzy question matching."""
78
+
79
+ return {token for token in normalize(text).split() if token not in STOPWORDS and len(token) > 1}
80
+
81
+
82
+ def similarity(question: str, pattern: str) -> float:
83
+ """Hybrid similarity using containment, token overlap, and sequence ratio."""
84
+
85
+ norm_q = normalize(question)
86
+ norm_p = normalize(pattern)
87
+ if len(norm_p) >= 4 and norm_p in norm_q:
88
+ return 0.95 + (len(norm_p) / max(len(norm_q), 1)) * 0.05
89
+
90
+ tokens_q = tokenize(question)
91
+ tokens_p = tokenize(pattern)
92
+ if not tokens_p:
93
+ return 0.0
94
+ token_score = len(tokens_q & tokens_p) / len(tokens_p)
95
+ seq_score = SequenceMatcher(None, norm_q, norm_p).ratio()
96
+ return max(token_score * 0.85 + seq_score * 0.15, seq_score)
97
+
98
+
99
+ class QABank:
100
+ """Application Q&A knowledge bank backed by explicit package paths."""
101
+
102
+ def __init__(
103
+ self,
104
+ bank_file: str | Path | None = None,
105
+ pending_file: str | Path | None = None,
106
+ profile: dict[str, Any] | None = None,
107
+ ) -> None:
108
+ self.bank_file = Path(bank_file).expanduser() if bank_file is not None else None
109
+ self.pending_file = Path(pending_file).expanduser() if pending_file is not None else None
110
+ self.profile = profile or {}
111
+ self.data = self._load()
112
+ self.session_unknowns: list[dict[str, Any]] = []
113
+
114
+ @classmethod
115
+ def from_runtime_paths(
116
+ cls,
117
+ paths: RuntimePaths,
118
+ profile: dict[str, Any] | None = None,
119
+ ) -> "QABank":
120
+ """Create a bank using standalone runtime locations."""
121
+
122
+ return cls(
123
+ bank_file=paths.qa_bank_file,
124
+ pending_file=paths.data_dir / "pending_questions.md",
125
+ profile=profile,
126
+ )
127
+
128
+ def _load(self) -> dict[str, Any]:
129
+ if self.bank_file is None or not self.bank_file.exists():
130
+ return {"qa_pairs": []}
131
+ parsed = yaml.safe_load(self.bank_file.read_text(encoding="utf-8"))
132
+ if parsed is None:
133
+ return {"qa_pairs": []}
134
+ if not isinstance(parsed, dict):
135
+ raise ValueError("Q&A bank root must be a mapping")
136
+ pairs = parsed.get("qa_pairs")
137
+ if pairs is None:
138
+ parsed["qa_pairs"] = []
139
+ elif not isinstance(pairs, list):
140
+ raise ValueError("qa_pairs must be a list")
141
+ return dict(parsed)
142
+
143
+ def _patterns_for(self, qa: dict[str, Any]) -> list[str]:
144
+ patterns = qa.get("patterns", qa.get("question_patterns", []))
145
+ if isinstance(patterns, str):
146
+ return [patterns]
147
+ if isinstance(patterns, list):
148
+ return [str(pattern) for pattern in patterns if str(pattern).strip()]
149
+ return []
150
+
151
+ def _field_type_for(self, qa: dict[str, Any]) -> str:
152
+ return str(qa.get("field_type") or qa.get("response_type") or "text")
153
+
154
+ def _substitute_placeholders(
155
+ self,
156
+ text: str,
157
+ context: dict[str, Any] | None = None,
158
+ ) -> str:
159
+ if not text or "{" not in text:
160
+ return text
161
+ ctx = context or {}
162
+ replacements = {
163
+ "{company}": ctx.get("company", ""),
164
+ "{role}": ctx.get("role", ""),
165
+ "{portfolio}": self.profile.get("portfolio", ""),
166
+ "{linkedin}": self.profile.get("linkedin", ""),
167
+ "{github}": self.profile.get("github", ""),
168
+ "{email}": self.profile.get("email", ""),
169
+ "{phone}": self.profile.get("phone", ""),
170
+ "{full_name}": self.profile.get("full_name", ""),
171
+ "{first_name}": self.profile.get("first_name", ""),
172
+ "{last_name}": self.profile.get("last_name", ""),
173
+ }
174
+ for placeholder, value in replacements.items():
175
+ text = text.replace(placeholder, str(value))
176
+ return text
177
+
178
+ def find_answer(
179
+ self,
180
+ question_text: str,
181
+ field_type: str | None = None,
182
+ context: dict[str, Any] | None = None,
183
+ ) -> dict[str, Any] | None:
184
+ """Return a matched answer record, or ``None`` when no threshold match exists."""
185
+
186
+ if not question_text:
187
+ return None
188
+
189
+ def type_compatible(bank_type: str, requested: str | None) -> bool:
190
+ if not requested or not bank_type:
191
+ return True
192
+ bank_type = bank_type.lower()
193
+ requested = requested.lower()
194
+ if bank_type == requested:
195
+ return True
196
+ if bank_type == "radio_or_select" and requested in {"radio", "select"}:
197
+ return True
198
+ textish = {"text", "textarea", "email", "tel", "url", "number"}
199
+ return bank_type in textish and requested in textish
200
+
201
+ best: dict[str, Any] | None = None
202
+ best_score = 0.0
203
+ best_compatible: dict[str, Any] | None = None
204
+ best_compatible_score = 0.0
205
+
206
+ for qa in self.data.get("qa_pairs", []):
207
+ if not isinstance(qa, dict):
208
+ continue
209
+ bank_type = self._field_type_for(qa)
210
+ compatible = type_compatible(bank_type, field_type)
211
+ for pattern in self._patterns_for(qa):
212
+ score = similarity(question_text, pattern)
213
+ if field_type and compatible:
214
+ score = min(score * 1.05, 1.0)
215
+ candidate = {
216
+ "id": qa.get("id", "?"),
217
+ "answer": self._substitute_placeholders(str(qa.get("answer", "")), context),
218
+ "field_type": bank_type,
219
+ "matched_pattern": pattern,
220
+ "score": round(score, 3),
221
+ }
222
+ pattern_specificity = len(normalize(pattern))
223
+ best_specificity = len(normalize(best["matched_pattern"])) if best else -1
224
+ compatible_specificity = (
225
+ len(normalize(best_compatible["matched_pattern"])) if best_compatible else -1
226
+ )
227
+ if score > best_score or (
228
+ score == best_score and pattern_specificity > best_specificity
229
+ ):
230
+ best_score = score
231
+ best = candidate
232
+ if compatible and (
233
+ score > best_compatible_score
234
+ or (
235
+ score == best_compatible_score
236
+ and pattern_specificity > compatible_specificity
237
+ )
238
+ ):
239
+ best_compatible_score = score
240
+ best_compatible = candidate
241
+
242
+ if field_type:
243
+ if best_compatible and best_compatible_score >= MATCH_THRESHOLD:
244
+ return best_compatible
245
+ return None
246
+ if best and best_score >= MATCH_THRESHOLD:
247
+ return best
248
+ return None
249
+
250
+ def log_pending(
251
+ self,
252
+ question_text: str,
253
+ context: dict[str, Any] | None = None,
254
+ field_type: str | None = None,
255
+ is_required: bool = False,
256
+ ) -> dict[str, Any]:
257
+ """Record an unknown question in memory and append when a pending path exists."""
258
+
259
+ ctx = context or {}
260
+ normalized_url = normalize_url_for_audit(ctx.get("apply_url") or ctx.get("url") or "")
261
+ entry = {
262
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
263
+ "question": question_text.strip(),
264
+ "company": ctx.get("company", "unknown"),
265
+ "role": ctx.get("role", "unknown"),
266
+ "ats": ctx.get("ats", "unknown"),
267
+ "domain": ctx.get("domain") or domain_from_url(normalized_url),
268
+ "field_type": field_type or "text",
269
+ "required": is_required,
270
+ }
271
+ self.session_unknowns.append(entry)
272
+ if self.pending_file is not None:
273
+ self._append_pending(entry)
274
+ return entry
275
+
276
+ def _append_pending(self, entry: dict[str, Any]) -> None:
277
+ if self.pending_file is None:
278
+ return
279
+ self.pending_file.parent.mkdir(parents=True, exist_ok=True)
280
+ existing_questions: set[str] = set()
281
+ if self.pending_file.exists():
282
+ content = self.pending_file.read_text(encoding="utf-8")
283
+ for line in content.splitlines():
284
+ match = re.match(r"^###\s+Q:\s*(.+?)\s*$", line)
285
+ if match:
286
+ existing_questions.add(normalize(match.group(1)))
287
+
288
+ if normalize(entry["question"]) in existing_questions:
289
+ self._increment_pending_counter(entry["question"], entry)
290
+ return
291
+
292
+ header_needed = not self.pending_file.exists() or self.pending_file.stat().st_size == 0
293
+ with self.pending_file.open("a", encoding="utf-8") as handle:
294
+ if header_needed:
295
+ handle.write(self._pending_header())
296
+ handle.write(self._format_pending_entry(entry))
297
+
298
+ def _pending_header(self) -> str:
299
+ return """# Pending Application Questions
300
+
301
+ These are questions the assistant encountered that are not in your selected Q&A bank yet.
302
+ Add a truthful answer below each question, then copy the final entry into your own Q&A bank.
303
+
304
+ Format for adding an answer:
305
+ ```
306
+ **Answer:** Your answer here
307
+ **Field type:** text | textarea | number | select | radio_or_select
308
+ **Patterns:** synonym1, synonym2
309
+ ```
310
+
311
+ ---
312
+
313
+ """
314
+
315
+ def _format_pending_entry(self, entry: dict[str, Any]) -> str:
316
+ seen_marker = f"[seen 1 time as of {entry['timestamp']}]"
317
+ return f"""### Q: {entry["question"]}
318
+
319
+ - **First seen:** {entry["timestamp"]}
320
+ - **First context:** {entry["company"]} - {entry["role"]} ({entry["ats"]})
321
+ - **Domain:** {entry["domain"] or "unknown"}
322
+ - **Field type:** {entry["field_type"]}
323
+ - **Required:** {entry["required"]}
324
+ - **Stats:** {seen_marker}
325
+
326
+ **Answer:** _(fill in here)_
327
+
328
+ **Patterns:** _(optional - add synonyms separated by commas)_
329
+
330
+ ---
331
+
332
+ """
333
+
334
+ def _increment_pending_counter(self, question: str, entry: dict[str, Any]) -> None:
335
+ if self.pending_file is None or not self.pending_file.exists():
336
+ return
337
+ lines = self.pending_file.read_text(encoding="utf-8").splitlines(keepends=True)
338
+ norm_target = normalize(question)
339
+ for i, line in enumerate(lines):
340
+ match = re.match(r"^###\s+Q:\s*(.+?)\s*$", line)
341
+ if not match or normalize(match.group(1)) != norm_target:
342
+ continue
343
+ for j in range(i, min(i + 20, len(lines))):
344
+ stat_match = re.match(
345
+ r"^- \*\*Stats:\*\* \[seen (\d+) times? as of [^\]]+\]\s*$",
346
+ lines[j],
347
+ )
348
+ if stat_match:
349
+ new_count = int(stat_match.group(1)) + 1
350
+ lines[j] = f"- **Stats:** [seen {new_count} times as of {entry['timestamp']}]\n"
351
+ self.pending_file.write_text("".join(lines), encoding="utf-8")
352
+ return
@@ -0,0 +1,119 @@
1
+ """Central report redaction for local JSON and Markdown artifacts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping, Sequence
6
+ import re
7
+ from typing import Any
8
+
9
+ from .form_engine import normalize_space
10
+ from .safety import normalize_url_for_audit
11
+
12
+
13
+ REDACTION_MARKER = "[REDACTED]"
14
+ MARKDOWN_VALUE_LIMIT = 180
15
+
16
+ _SENSITIVE_KEY_PARTS = (
17
+ "password",
18
+ "secret",
19
+ "token",
20
+ "cookie",
21
+ "credential",
22
+ "auth",
23
+ "session",
24
+ "browser_profile",
25
+ "browser-profile",
26
+ "raw_html",
27
+ "html",
28
+ "screenshot",
29
+ "resume_contents",
30
+ "cover_letter_contents",
31
+ "document_contents",
32
+ "phone_answer",
33
+ "email_answer",
34
+ "answer_phone",
35
+ "answer_email",
36
+ "application_history",
37
+ "candidate",
38
+ "profile",
39
+ "documents",
40
+ "raw_form",
41
+ "raw_state",
42
+ )
43
+
44
+ _SENSITIVE_EXACT_KEYS = {"answer", "email", "phone", "tel"}
45
+
46
+ _URL_KEYS = {"url", "apply_url", "search_url"}
47
+
48
+ _SENSITIVE_VALUE_PATTERNS = (
49
+ re.compile(r"\bBearer\s+[A-Za-z0-9._~+/=-]+", re.IGNORECASE),
50
+ re.compile(r"\bsessionid\s*=", re.IGNORECASE),
51
+ re.compile(r"\bcookie\s*[:=]", re.IGNORECASE),
52
+ re.compile(r"<\s*html\b", re.IGNORECASE),
53
+ re.compile(r"[\w.+-]+@[\w.-]+\.[A-Za-z]{2,}"),
54
+ re.compile(r"\+?\d[\d\s().-]{7,}\d"),
55
+ )
56
+
57
+
58
+ def _normalized_key(key: Any) -> str:
59
+ return re.sub(r"[\s-]+", "_", str(key or "").strip().lower())
60
+
61
+
62
+ def _is_sensitive_key(key: Any) -> bool:
63
+ normalized = _normalized_key(key)
64
+ if normalized in _SENSITIVE_EXACT_KEYS:
65
+ return True
66
+ return any(part in normalized for part in _SENSITIVE_KEY_PARTS)
67
+
68
+
69
+ def _is_url_key(key: Any) -> bool:
70
+ normalized = _normalized_key(key)
71
+ return normalized in _URL_KEYS or normalized.endswith("_url")
72
+
73
+
74
+ def _is_sensitive_value(value: str) -> bool:
75
+ return any(pattern.search(value) for pattern in _SENSITIVE_VALUE_PATTERNS)
76
+
77
+
78
+ def sanitize_report_payload(payload: Any) -> Any:
79
+ """Return a sanitized copy of a report payload without mutating input."""
80
+
81
+ return _sanitize_value(payload, key="")
82
+
83
+
84
+ def _sanitize_value(value: Any, *, key: Any) -> Any:
85
+ if _is_sensitive_key(key):
86
+ return REDACTION_MARKER
87
+ if isinstance(value, Mapping):
88
+ return {
89
+ str(item_key): _sanitize_value(item_value, key=item_key)
90
+ for item_key, item_value in value.items()
91
+ }
92
+ if isinstance(value, tuple):
93
+ return [_sanitize_value(item, key=key) for item in value]
94
+ if isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)):
95
+ return [_sanitize_value(item, key=key) for item in value]
96
+ if isinstance(value, str):
97
+ if _is_url_key(key):
98
+ return normalize_url_for_audit(value)
99
+ if _is_sensitive_value(value):
100
+ return REDACTION_MARKER
101
+ return value
102
+ return value
103
+
104
+
105
+ def sanitize_markdown_value(value: Any) -> str:
106
+ """Return compact Markdown-safe text for a sanitized report field."""
107
+
108
+ sanitized = sanitize_report_payload(value)
109
+ if isinstance(sanitized, (Mapping, list, tuple)):
110
+ rendered = REDACTION_MARKER if sanitized == REDACTION_MARKER else str(sanitized)
111
+ else:
112
+ rendered = str(sanitized)
113
+ rendered = normalize_space(rendered).replace("|", r"\|")
114
+ if len(rendered) > MARKDOWN_VALUE_LIMIT:
115
+ return f"{rendered[: MARKDOWN_VALUE_LIMIT - 3]}..."
116
+ return rendered
117
+
118
+
119
+ __all__ = ["REDACTION_MARKER", "sanitize_markdown_value", "sanitize_report_payload"]