linkedin-apply-assistant 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.yml +72 -0
  2. package/.github/ISSUE_TEMPLATE/config.yml +5 -0
  3. package/.github/ISSUE_TEMPLATE/config_help.yml +49 -0
  4. package/.github/ISSUE_TEMPLATE/docs.yml +40 -0
  5. package/.github/ISSUE_TEMPLATE/feature_request.yml +45 -0
  6. package/.github/ISSUE_TEMPLATE/safety_compliance.yml +48 -0
  7. package/.github/PULL_REQUEST_TEMPLATE.md +43 -0
  8. package/CHANGELOG.md +47 -0
  9. package/CODE_OF_CONDUCT.md +47 -0
  10. package/CONTRIBUTING.md +64 -0
  11. package/GOVERNANCE.md +41 -0
  12. package/LEGAL.md +38 -0
  13. package/LICENSE +22 -0
  14. package/MIGRATION.md +50 -0
  15. package/README.md +167 -0
  16. package/RELEASE_CHECKLIST.md +454 -0
  17. package/SAFETY.md +33 -0
  18. package/SECURITY.md +37 -0
  19. package/SUPPORT.md +44 -0
  20. package/THIRD_PARTY_NOTICES.md +67 -0
  21. package/bin/linkedin-apply-assistant.mjs +95 -0
  22. package/configs/config.example.yml +24 -0
  23. package/configs/qa_bank.example.yml +35 -0
  24. package/docs/apply.md +40 -0
  25. package/docs/assist.md +35 -0
  26. package/docs/browser-session.md +45 -0
  27. package/docs/ci-and-release-policy.md +105 -0
  28. package/docs/commands.md +176 -0
  29. package/docs/install-and-configuration.md +265 -0
  30. package/docs/registry-publication-strategy.md +169 -0
  31. package/docs/reports.md +35 -0
  32. package/docs/search.md +39 -0
  33. package/docs/troubleshooting.md +57 -0
  34. package/examples/dry_run_input.example.json +25 -0
  35. package/examples/reports/apply-audit.example.json +31 -0
  36. package/examples/reports/search-report.example.json +40 -0
  37. package/install.ps1 +178 -0
  38. package/package.json +59 -0
  39. package/pyproject.toml +51 -0
  40. package/src/linkedin_apply_assistant/__init__.py +8 -0
  41. package/src/linkedin_apply_assistant/apply_reports.py +229 -0
  42. package/src/linkedin_apply_assistant/ats_handlers.py +217 -0
  43. package/src/linkedin_apply_assistant/browser_sessions.py +155 -0
  44. package/src/linkedin_apply_assistant/cli.py +570 -0
  45. package/src/linkedin_apply_assistant/config.py +109 -0
  46. package/src/linkedin_apply_assistant/contracts.py +255 -0
  47. package/src/linkedin_apply_assistant/form_engine.py +180 -0
  48. package/src/linkedin_apply_assistant/linkedin_layer.py +436 -0
  49. package/src/linkedin_apply_assistant/page_actions.py +110 -0
  50. package/src/linkedin_apply_assistant/page_selectors.py +88 -0
  51. package/src/linkedin_apply_assistant/paths.py +135 -0
  52. package/src/linkedin_apply_assistant/qa_bank.py +352 -0
  53. package/src/linkedin_apply_assistant/redaction.py +119 -0
  54. package/src/linkedin_apply_assistant/safety.py +230 -0
  55. package/src/linkedin_apply_assistant/workflows.py +435 -0
@@ -0,0 +1,255 @@
1
+ """Stable workflow contracts for the standalone assistant."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Any, Mapping, Protocol, Sequence
8
+
9
+ from .form_engine import DetectionResult, FillResult
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class JobRecord:
14
+ """Plain-value LinkedIn job context used by workflow reports."""
15
+
16
+ job_id: str = ""
17
+ title: str = ""
18
+ company: str = ""
19
+ url: str = ""
20
+ location: str = ""
21
+ source: str = "linkedin"
22
+ search_url: str = ""
23
+ ats: str = ""
24
+ raw: dict[str, Any] = field(default_factory=dict)
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class SearchRequest:
29
+ """Inputs for a search-only workflow run."""
30
+
31
+ limit: int = 10
32
+ search_url: str | None = None
33
+ query: str | None = None
34
+ location: str | None = None
35
+ profile: dict[str, Any] = field(default_factory=dict)
36
+ paths: Any = None
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class SurfaceIdentity:
41
+ """Stable identity for a visible application surface."""
42
+
43
+ url: str = ""
44
+ title: str = ""
45
+ surface: str = ""
46
+ ats: str = ""
47
+ job_id: str = ""
48
+
49
+ def key(self) -> str:
50
+ """Return a compact key for fill-once session deduplication."""
51
+
52
+ return "|".join((self.url, self.title, self.surface, self.ats, self.job_id))
53
+
54
+
55
+ @dataclass(frozen=True)
56
+ class ReportArtifact:
57
+ """One report file produced by a workflow."""
58
+
59
+ kind: str
60
+ path: Path
61
+
62
+
63
+ @dataclass
64
+ class SearchResult:
65
+ """Search workflow output and report metadata."""
66
+
67
+ command: str = "search"
68
+ timestamp: str = ""
69
+ search_url: str = ""
70
+ jobs: list[JobRecord] = field(default_factory=list)
71
+ events: list[dict[str, Any]] = field(default_factory=list)
72
+ summary: dict[str, Any] = field(default_factory=dict)
73
+ reports: list[ReportArtifact] = field(default_factory=list)
74
+
75
+
76
+ @dataclass(frozen=True)
77
+ class AssistEvent:
78
+ """One assistive fill workflow event."""
79
+
80
+ type: str
81
+ surface: str = ""
82
+ ats: str = ""
83
+ status: str = ""
84
+ filled_count: int = 0
85
+ required_empty_count: int = 0
86
+ unknown_count: int = 0
87
+ reached_submit_step: bool = False
88
+ blocked_reason: str = ""
89
+ job: dict[str, Any] = field(default_factory=dict)
90
+ identity: SurfaceIdentity | None = None
91
+ required_empty: list[Any] = field(default_factory=list)
92
+ unknown_questions: list[Any] = field(default_factory=list)
93
+ timestamp: str = ""
94
+
95
+
96
+ @dataclass(frozen=True)
97
+ class AssistRequest:
98
+ """Inputs for a visible-browser assistive fill session."""
99
+
100
+ start_url: str | None = None
101
+ mode: str = "auto"
102
+ max_cycles: int = 1
103
+ profile: dict[str, Any] = field(default_factory=dict)
104
+ qa_context: dict[str, Any] = field(default_factory=dict)
105
+ documents: dict[str, Any] = field(default_factory=dict)
106
+ paths: Any = None
107
+
108
+
109
+ @dataclass
110
+ class AssistResult:
111
+ """Assist workflow output and report metadata."""
112
+
113
+ command: str = "assist"
114
+ timestamp: str = ""
115
+ events: list[AssistEvent] = field(default_factory=list)
116
+ summary: dict[str, Any] = field(default_factory=dict)
117
+ reports: list[ReportArtifact] = field(default_factory=list)
118
+
119
+
120
+ @dataclass(frozen=True)
121
+ class SubmissionResult:
122
+ """Read-only submission policy decision."""
123
+
124
+ status: str = "disabled"
125
+ reason: str = "Browser submission is disabled in this package boundary."
126
+ allowed: bool = False
127
+
128
+
129
+ @dataclass(frozen=True)
130
+ class SubmitDecision:
131
+ """Plain-value audit record for an explicit submit/apply boundary."""
132
+
133
+ timestamp: str = ""
134
+ command: str = ""
135
+ policy: str = ""
136
+ action: str = ""
137
+ allowed: bool = False
138
+ status: str = "disabled"
139
+ reason: str = ""
140
+ company: str = ""
141
+ role: str = ""
142
+ url: str = ""
143
+ domain: str = ""
144
+ ats: str = ""
145
+ confirmation_state: str = ""
146
+
147
+
148
+ class BrowserSession(Protocol):
149
+ """Visible browser session abstraction."""
150
+
151
+ @property
152
+ def pages(self) -> Sequence[Any]:
153
+ """Return pages or tabs known to the session."""
154
+
155
+ def open_url(self, url: str) -> None:
156
+ """Open a URL in the visible session."""
157
+
158
+ def close(self) -> None:
159
+ """Close the session when the caller owns it."""
160
+
161
+
162
+ class BrowserSessionFactory(Protocol):
163
+ """Factory that creates visible browser sessions."""
164
+
165
+ def open(self, request: AssistRequest | SearchRequest) -> BrowserSession:
166
+ """Open a session for a workflow request."""
167
+
168
+
169
+ class LinkedInDiscovery(Protocol):
170
+ """LinkedIn search result discovery boundary."""
171
+
172
+ def discover(self, request: SearchRequest) -> Sequence[JobRecord | Mapping[str, Any]]:
173
+ """Return candidate job records for a search request."""
174
+
175
+
176
+ class ApplySurfaceDetector(Protocol):
177
+ """Current apply surface detection boundary."""
178
+
179
+ def detect(self, session: BrowserSession) -> DetectionResult:
180
+ """Detect the currently fillable application surface."""
181
+
182
+
183
+ class FillAdapter(Protocol):
184
+ """Fill-only adapter boundary."""
185
+
186
+ def fill(
187
+ self,
188
+ detection: DetectionResult,
189
+ profile: dict[str, Any],
190
+ bank: Any = None,
191
+ qa_context: dict[str, Any] | None = None,
192
+ documents: dict[str, Any] | None = None,
193
+ ) -> FillResult:
194
+ """Fill a detected surface without performing submission."""
195
+
196
+
197
+ class QAMatcher(Protocol):
198
+ """Q&A matching boundary."""
199
+
200
+ def find_answer(
201
+ self,
202
+ question_text: str,
203
+ field_type: str | None = None,
204
+ context: dict[str, Any] | None = None,
205
+ ) -> dict[str, Any] | None:
206
+ """Find an answer for a question."""
207
+
208
+ def log_pending(
209
+ self,
210
+ question_text: str,
211
+ context: dict[str, Any] | None = None,
212
+ field_type: str | None = None,
213
+ is_required: bool = False,
214
+ ) -> dict[str, Any]:
215
+ """Record an unknown question."""
216
+
217
+
218
+ class SubmissionPolicy(Protocol):
219
+ """Submission policy boundary."""
220
+
221
+ def decide(
222
+ self,
223
+ action: str,
224
+ context: dict[str, Any] | None = None,
225
+ ) -> SubmissionResult:
226
+ """Return whether an action is allowed."""
227
+
228
+
229
+ class ReportSink(Protocol):
230
+ """Workflow report writing boundary."""
231
+
232
+ def write(self, command: str, report: dict[str, Any]) -> Sequence[ReportArtifact]:
233
+ """Write report artifacts for a workflow command."""
234
+
235
+
236
+ __all__ = [
237
+ "ApplySurfaceDetector",
238
+ "AssistEvent",
239
+ "AssistRequest",
240
+ "AssistResult",
241
+ "BrowserSession",
242
+ "BrowserSessionFactory",
243
+ "FillAdapter",
244
+ "JobRecord",
245
+ "LinkedInDiscovery",
246
+ "QAMatcher",
247
+ "ReportArtifact",
248
+ "ReportSink",
249
+ "SearchRequest",
250
+ "SearchResult",
251
+ "SubmissionPolicy",
252
+ "SubmissionResult",
253
+ "SubmitDecision",
254
+ "SurfaceIdentity",
255
+ ]
@@ -0,0 +1,180 @@
1
+ """Import-safe form primitives for the standalone assistant."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timezone
7
+ import json
8
+ from pathlib import Path
9
+ import re
10
+ from typing import Any
11
+
12
+
13
+ _real_print = print
14
+
15
+
16
+ def safe_print(*args: object, **kwargs: object) -> None:
17
+ """Print text while tolerating narrow console encodings."""
18
+
19
+ try:
20
+ _real_print(*args, **kwargs)
21
+ except UnicodeEncodeError:
22
+ cleaned = []
23
+ for arg in args:
24
+ try:
25
+ cleaned.append(str(arg))
26
+ except Exception:
27
+ cleaned.append(repr(arg))
28
+ try:
29
+ _real_print(*cleaned, **kwargs)
30
+ except UnicodeEncodeError:
31
+ ascii_cleaned = [
32
+ str(arg).encode("ascii", errors="replace").decode("ascii") for arg in cleaned
33
+ ]
34
+ _real_print(*ascii_cleaned, **kwargs)
35
+
36
+
37
+ @dataclass
38
+ class FillResult:
39
+ """Submit-free fill result shared by package fill-only surfaces."""
40
+
41
+ filled: list[Any] = field(default_factory=list)
42
+ required_empty: list[Any] = field(default_factory=list)
43
+ unknown_questions: list[Any] = field(default_factory=list)
44
+ reached_submit_step: bool = False
45
+ surface: str = ""
46
+
47
+
48
+ @dataclass
49
+ class DetectionResult:
50
+ """Read-only detection result for the currently visible application surface."""
51
+
52
+ surface: str = "none"
53
+ page: Any = None
54
+ ats: str = ""
55
+ job_context: dict[str, Any] = field(default_factory=dict)
56
+
57
+
58
+ ATS_PATTERNS: dict[str, tuple[str, ...]] = {
59
+ "greenhouse": (
60
+ r"boards\.greenhouse\.io",
61
+ r"job-boards\.greenhouse\.io",
62
+ r"greenhouse\.io/embed",
63
+ ),
64
+ "lever": (
65
+ r"jobs\.lever\.co",
66
+ r"lever\.co/[a-z0-9-]+/",
67
+ ),
68
+ "ashby": (
69
+ r"jobs\.ashbyhq\.com",
70
+ r"ashbyhq\.com",
71
+ ),
72
+ "workday": (
73
+ r"\.myworkdayjobs\.com",
74
+ r"workday\.com",
75
+ ),
76
+ "smartrecruiters": (
77
+ r"smartrecruiters\.com",
78
+ r"jobs\.smartrecruiters\.com",
79
+ ),
80
+ "recruitee": (r"\.recruitee\.com",),
81
+ "workable": (
82
+ r"apply\.workable\.com",
83
+ r"jobs\.workable\.com",
84
+ ),
85
+ "bamboohr": (r"\.bamboohr\.com",),
86
+ "icims": (r"\.icims\.com",),
87
+ "taleo": (r"\.taleo\.net",),
88
+ "successfactors": (
89
+ r"\.successfactors\.com",
90
+ r"jobs\.sap\.com",
91
+ ),
92
+ "personio": (
93
+ r"\.personio\.de",
94
+ r"jobs\.personio\.com",
95
+ ),
96
+ "teamtailor": (r"\.teamtailor\.com",),
97
+ "jobvite": (
98
+ r"jobs\.jobvite\.com",
99
+ r"\.jobvite\.com",
100
+ ),
101
+ "resumator": (r"applytojob\.com",),
102
+ }
103
+
104
+
105
+ def detect_ats(url: str) -> str:
106
+ """Return the known ATS identifier for a URL, or ``unknown``."""
107
+
108
+ for ats, patterns in ATS_PATTERNS.items():
109
+ for pattern in patterns:
110
+ if re.search(pattern, url or "", re.IGNORECASE):
111
+ return ats
112
+ return "unknown"
113
+
114
+
115
+ def normalize_space(value: Any) -> str:
116
+ """Collapse whitespace for stable text comparisons."""
117
+
118
+ return re.sub(r"\s+", " ", str(value or "")).strip()
119
+
120
+
121
+ def load_jobs(input_file: Path) -> list[dict[str, Any]]:
122
+ """Load a JSON job list or object with a ``jobs`` list from an explicit path."""
123
+
124
+ payload = json.loads(Path(input_file).read_text(encoding="utf-8"))
125
+ if isinstance(payload, list):
126
+ jobs = payload
127
+ elif isinstance(payload, dict) and isinstance(payload.get("jobs"), list):
128
+ jobs = payload["jobs"]
129
+ else:
130
+ raise ValueError("job input must be a list or an object with a jobs list")
131
+ if not all(isinstance(job, dict) for job in jobs):
132
+ raise ValueError("every job entry must be an object")
133
+ return list(jobs)
134
+
135
+
136
+ def load_applied_job_ids(path: Path) -> set[str]:
137
+ """Load applied job ids from an explicit JSONL path."""
138
+
139
+ applied_path = Path(path).expanduser()
140
+ if not applied_path.exists():
141
+ return set()
142
+ ids: set[str] = set()
143
+ for line in applied_path.read_text(encoding="utf-8").splitlines():
144
+ if not line.strip():
145
+ continue
146
+ try:
147
+ payload = json.loads(line)
148
+ except json.JSONDecodeError:
149
+ continue
150
+ if not isinstance(payload, dict):
151
+ continue
152
+ job_id = str(payload.get("job_id") or "").strip()
153
+ if job_id:
154
+ ids.add(job_id)
155
+ return ids
156
+
157
+
158
+ def append_applied_job_id(
159
+ path: Path,
160
+ job_id: str,
161
+ *,
162
+ metadata: dict[str, Any] | None = None,
163
+ ) -> Path:
164
+ """Append one applied job id to an explicit JSONL path."""
165
+
166
+ clean_job_id = normalize_space(job_id)
167
+ if not clean_job_id:
168
+ raise ValueError("job_id is required")
169
+
170
+ target = Path(path).expanduser()
171
+ target.parent.mkdir(parents=True, exist_ok=True)
172
+ payload: dict[str, Any] = {
173
+ "job_id": clean_job_id,
174
+ "timestamp": datetime.now(timezone.utc).isoformat(),
175
+ }
176
+ if metadata:
177
+ payload["metadata"] = dict(metadata)
178
+ with target.open("a", encoding="utf-8") as handle:
179
+ handle.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
180
+ return target