linkedin-apply-assistant 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/bug_report.yml +72 -0
- package/.github/ISSUE_TEMPLATE/config.yml +5 -0
- package/.github/ISSUE_TEMPLATE/config_help.yml +49 -0
- package/.github/ISSUE_TEMPLATE/docs.yml +40 -0
- package/.github/ISSUE_TEMPLATE/feature_request.yml +45 -0
- package/.github/ISSUE_TEMPLATE/safety_compliance.yml +48 -0
- package/.github/PULL_REQUEST_TEMPLATE.md +43 -0
- package/CHANGELOG.md +47 -0
- package/CODE_OF_CONDUCT.md +47 -0
- package/CONTRIBUTING.md +64 -0
- package/GOVERNANCE.md +41 -0
- package/LEGAL.md +38 -0
- package/LICENSE +22 -0
- package/MIGRATION.md +50 -0
- package/README.md +167 -0
- package/RELEASE_CHECKLIST.md +454 -0
- package/SAFETY.md +33 -0
- package/SECURITY.md +37 -0
- package/SUPPORT.md +44 -0
- package/THIRD_PARTY_NOTICES.md +67 -0
- package/bin/linkedin-apply-assistant.mjs +95 -0
- package/configs/config.example.yml +24 -0
- package/configs/qa_bank.example.yml +35 -0
- package/docs/apply.md +40 -0
- package/docs/assist.md +35 -0
- package/docs/browser-session.md +45 -0
- package/docs/ci-and-release-policy.md +105 -0
- package/docs/commands.md +176 -0
- package/docs/install-and-configuration.md +265 -0
- package/docs/registry-publication-strategy.md +169 -0
- package/docs/reports.md +35 -0
- package/docs/search.md +39 -0
- package/docs/troubleshooting.md +57 -0
- package/examples/dry_run_input.example.json +25 -0
- package/examples/reports/apply-audit.example.json +31 -0
- package/examples/reports/search-report.example.json +40 -0
- package/install.ps1 +178 -0
- package/package.json +59 -0
- package/pyproject.toml +51 -0
- package/src/linkedin_apply_assistant/__init__.py +8 -0
- package/src/linkedin_apply_assistant/apply_reports.py +229 -0
- package/src/linkedin_apply_assistant/ats_handlers.py +217 -0
- package/src/linkedin_apply_assistant/browser_sessions.py +155 -0
- package/src/linkedin_apply_assistant/cli.py +570 -0
- package/src/linkedin_apply_assistant/config.py +109 -0
- package/src/linkedin_apply_assistant/contracts.py +255 -0
- package/src/linkedin_apply_assistant/form_engine.py +180 -0
- package/src/linkedin_apply_assistant/linkedin_layer.py +436 -0
- package/src/linkedin_apply_assistant/page_actions.py +110 -0
- package/src/linkedin_apply_assistant/page_selectors.py +88 -0
- package/src/linkedin_apply_assistant/paths.py +135 -0
- package/src/linkedin_apply_assistant/qa_bank.py +352 -0
- package/src/linkedin_apply_assistant/redaction.py +119 -0
- package/src/linkedin_apply_assistant/safety.py +230 -0
- package/src/linkedin_apply_assistant/workflows.py +435 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""Stable workflow contracts for the standalone assistant."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Mapping, Protocol, Sequence
|
|
8
|
+
|
|
9
|
+
from .form_engine import DetectionResult, FillResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class JobRecord:
|
|
14
|
+
"""Plain-value LinkedIn job context used by workflow reports."""
|
|
15
|
+
|
|
16
|
+
job_id: str = ""
|
|
17
|
+
title: str = ""
|
|
18
|
+
company: str = ""
|
|
19
|
+
url: str = ""
|
|
20
|
+
location: str = ""
|
|
21
|
+
source: str = "linkedin"
|
|
22
|
+
search_url: str = ""
|
|
23
|
+
ats: str = ""
|
|
24
|
+
raw: dict[str, Any] = field(default_factory=dict)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class SearchRequest:
|
|
29
|
+
"""Inputs for a search-only workflow run."""
|
|
30
|
+
|
|
31
|
+
limit: int = 10
|
|
32
|
+
search_url: str | None = None
|
|
33
|
+
query: str | None = None
|
|
34
|
+
location: str | None = None
|
|
35
|
+
profile: dict[str, Any] = field(default_factory=dict)
|
|
36
|
+
paths: Any = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class SurfaceIdentity:
|
|
41
|
+
"""Stable identity for a visible application surface."""
|
|
42
|
+
|
|
43
|
+
url: str = ""
|
|
44
|
+
title: str = ""
|
|
45
|
+
surface: str = ""
|
|
46
|
+
ats: str = ""
|
|
47
|
+
job_id: str = ""
|
|
48
|
+
|
|
49
|
+
def key(self) -> str:
|
|
50
|
+
"""Return a compact key for fill-once session deduplication."""
|
|
51
|
+
|
|
52
|
+
return "|".join((self.url, self.title, self.surface, self.ats, self.job_id))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class ReportArtifact:
|
|
57
|
+
"""One report file produced by a workflow."""
|
|
58
|
+
|
|
59
|
+
kind: str
|
|
60
|
+
path: Path
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class SearchResult:
|
|
65
|
+
"""Search workflow output and report metadata."""
|
|
66
|
+
|
|
67
|
+
command: str = "search"
|
|
68
|
+
timestamp: str = ""
|
|
69
|
+
search_url: str = ""
|
|
70
|
+
jobs: list[JobRecord] = field(default_factory=list)
|
|
71
|
+
events: list[dict[str, Any]] = field(default_factory=list)
|
|
72
|
+
summary: dict[str, Any] = field(default_factory=dict)
|
|
73
|
+
reports: list[ReportArtifact] = field(default_factory=list)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass(frozen=True)
|
|
77
|
+
class AssistEvent:
|
|
78
|
+
"""One assistive fill workflow event."""
|
|
79
|
+
|
|
80
|
+
type: str
|
|
81
|
+
surface: str = ""
|
|
82
|
+
ats: str = ""
|
|
83
|
+
status: str = ""
|
|
84
|
+
filled_count: int = 0
|
|
85
|
+
required_empty_count: int = 0
|
|
86
|
+
unknown_count: int = 0
|
|
87
|
+
reached_submit_step: bool = False
|
|
88
|
+
blocked_reason: str = ""
|
|
89
|
+
job: dict[str, Any] = field(default_factory=dict)
|
|
90
|
+
identity: SurfaceIdentity | None = None
|
|
91
|
+
required_empty: list[Any] = field(default_factory=list)
|
|
92
|
+
unknown_questions: list[Any] = field(default_factory=list)
|
|
93
|
+
timestamp: str = ""
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass(frozen=True)
|
|
97
|
+
class AssistRequest:
|
|
98
|
+
"""Inputs for a visible-browser assistive fill session."""
|
|
99
|
+
|
|
100
|
+
start_url: str | None = None
|
|
101
|
+
mode: str = "auto"
|
|
102
|
+
max_cycles: int = 1
|
|
103
|
+
profile: dict[str, Any] = field(default_factory=dict)
|
|
104
|
+
qa_context: dict[str, Any] = field(default_factory=dict)
|
|
105
|
+
documents: dict[str, Any] = field(default_factory=dict)
|
|
106
|
+
paths: Any = None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass
|
|
110
|
+
class AssistResult:
|
|
111
|
+
"""Assist workflow output and report metadata."""
|
|
112
|
+
|
|
113
|
+
command: str = "assist"
|
|
114
|
+
timestamp: str = ""
|
|
115
|
+
events: list[AssistEvent] = field(default_factory=list)
|
|
116
|
+
summary: dict[str, Any] = field(default_factory=dict)
|
|
117
|
+
reports: list[ReportArtifact] = field(default_factory=list)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass(frozen=True)
|
|
121
|
+
class SubmissionResult:
|
|
122
|
+
"""Read-only submission policy decision."""
|
|
123
|
+
|
|
124
|
+
status: str = "disabled"
|
|
125
|
+
reason: str = "Browser submission is disabled in this package boundary."
|
|
126
|
+
allowed: bool = False
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@dataclass(frozen=True)
|
|
130
|
+
class SubmitDecision:
|
|
131
|
+
"""Plain-value audit record for an explicit submit/apply boundary."""
|
|
132
|
+
|
|
133
|
+
timestamp: str = ""
|
|
134
|
+
command: str = ""
|
|
135
|
+
policy: str = ""
|
|
136
|
+
action: str = ""
|
|
137
|
+
allowed: bool = False
|
|
138
|
+
status: str = "disabled"
|
|
139
|
+
reason: str = ""
|
|
140
|
+
company: str = ""
|
|
141
|
+
role: str = ""
|
|
142
|
+
url: str = ""
|
|
143
|
+
domain: str = ""
|
|
144
|
+
ats: str = ""
|
|
145
|
+
confirmation_state: str = ""
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class BrowserSession(Protocol):
|
|
149
|
+
"""Visible browser session abstraction."""
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def pages(self) -> Sequence[Any]:
|
|
153
|
+
"""Return pages or tabs known to the session."""
|
|
154
|
+
|
|
155
|
+
def open_url(self, url: str) -> None:
|
|
156
|
+
"""Open a URL in the visible session."""
|
|
157
|
+
|
|
158
|
+
def close(self) -> None:
|
|
159
|
+
"""Close the session when the caller owns it."""
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class BrowserSessionFactory(Protocol):
|
|
163
|
+
"""Factory that creates visible browser sessions."""
|
|
164
|
+
|
|
165
|
+
def open(self, request: AssistRequest | SearchRequest) -> BrowserSession:
|
|
166
|
+
"""Open a session for a workflow request."""
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class LinkedInDiscovery(Protocol):
|
|
170
|
+
"""LinkedIn search result discovery boundary."""
|
|
171
|
+
|
|
172
|
+
def discover(self, request: SearchRequest) -> Sequence[JobRecord | Mapping[str, Any]]:
|
|
173
|
+
"""Return candidate job records for a search request."""
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class ApplySurfaceDetector(Protocol):
|
|
177
|
+
"""Current apply surface detection boundary."""
|
|
178
|
+
|
|
179
|
+
def detect(self, session: BrowserSession) -> DetectionResult:
|
|
180
|
+
"""Detect the currently fillable application surface."""
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class FillAdapter(Protocol):
|
|
184
|
+
"""Fill-only adapter boundary."""
|
|
185
|
+
|
|
186
|
+
def fill(
|
|
187
|
+
self,
|
|
188
|
+
detection: DetectionResult,
|
|
189
|
+
profile: dict[str, Any],
|
|
190
|
+
bank: Any = None,
|
|
191
|
+
qa_context: dict[str, Any] | None = None,
|
|
192
|
+
documents: dict[str, Any] | None = None,
|
|
193
|
+
) -> FillResult:
|
|
194
|
+
"""Fill a detected surface without performing submission."""
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class QAMatcher(Protocol):
|
|
198
|
+
"""Q&A matching boundary."""
|
|
199
|
+
|
|
200
|
+
def find_answer(
|
|
201
|
+
self,
|
|
202
|
+
question_text: str,
|
|
203
|
+
field_type: str | None = None,
|
|
204
|
+
context: dict[str, Any] | None = None,
|
|
205
|
+
) -> dict[str, Any] | None:
|
|
206
|
+
"""Find an answer for a question."""
|
|
207
|
+
|
|
208
|
+
def log_pending(
|
|
209
|
+
self,
|
|
210
|
+
question_text: str,
|
|
211
|
+
context: dict[str, Any] | None = None,
|
|
212
|
+
field_type: str | None = None,
|
|
213
|
+
is_required: bool = False,
|
|
214
|
+
) -> dict[str, Any]:
|
|
215
|
+
"""Record an unknown question."""
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class SubmissionPolicy(Protocol):
|
|
219
|
+
"""Submission policy boundary."""
|
|
220
|
+
|
|
221
|
+
def decide(
|
|
222
|
+
self,
|
|
223
|
+
action: str,
|
|
224
|
+
context: dict[str, Any] | None = None,
|
|
225
|
+
) -> SubmissionResult:
|
|
226
|
+
"""Return whether an action is allowed."""
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class ReportSink(Protocol):
|
|
230
|
+
"""Workflow report writing boundary."""
|
|
231
|
+
|
|
232
|
+
def write(self, command: str, report: dict[str, Any]) -> Sequence[ReportArtifact]:
|
|
233
|
+
"""Write report artifacts for a workflow command."""
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
__all__ = [
|
|
237
|
+
"ApplySurfaceDetector",
|
|
238
|
+
"AssistEvent",
|
|
239
|
+
"AssistRequest",
|
|
240
|
+
"AssistResult",
|
|
241
|
+
"BrowserSession",
|
|
242
|
+
"BrowserSessionFactory",
|
|
243
|
+
"FillAdapter",
|
|
244
|
+
"JobRecord",
|
|
245
|
+
"LinkedInDiscovery",
|
|
246
|
+
"QAMatcher",
|
|
247
|
+
"ReportArtifact",
|
|
248
|
+
"ReportSink",
|
|
249
|
+
"SearchRequest",
|
|
250
|
+
"SearchResult",
|
|
251
|
+
"SubmissionPolicy",
|
|
252
|
+
"SubmissionResult",
|
|
253
|
+
"SubmitDecision",
|
|
254
|
+
"SurfaceIdentity",
|
|
255
|
+
]
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Import-safe form primitives for the standalone assistant."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import re
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
_real_print = print
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def safe_print(*args: object, **kwargs: object) -> None:
|
|
17
|
+
"""Print text while tolerating narrow console encodings."""
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
_real_print(*args, **kwargs)
|
|
21
|
+
except UnicodeEncodeError:
|
|
22
|
+
cleaned = []
|
|
23
|
+
for arg in args:
|
|
24
|
+
try:
|
|
25
|
+
cleaned.append(str(arg))
|
|
26
|
+
except Exception:
|
|
27
|
+
cleaned.append(repr(arg))
|
|
28
|
+
try:
|
|
29
|
+
_real_print(*cleaned, **kwargs)
|
|
30
|
+
except UnicodeEncodeError:
|
|
31
|
+
ascii_cleaned = [
|
|
32
|
+
str(arg).encode("ascii", errors="replace").decode("ascii") for arg in cleaned
|
|
33
|
+
]
|
|
34
|
+
_real_print(*ascii_cleaned, **kwargs)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class FillResult:
|
|
39
|
+
"""Submit-free fill result shared by package fill-only surfaces."""
|
|
40
|
+
|
|
41
|
+
filled: list[Any] = field(default_factory=list)
|
|
42
|
+
required_empty: list[Any] = field(default_factory=list)
|
|
43
|
+
unknown_questions: list[Any] = field(default_factory=list)
|
|
44
|
+
reached_submit_step: bool = False
|
|
45
|
+
surface: str = ""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class DetectionResult:
|
|
50
|
+
"""Read-only detection result for the currently visible application surface."""
|
|
51
|
+
|
|
52
|
+
surface: str = "none"
|
|
53
|
+
page: Any = None
|
|
54
|
+
ats: str = ""
|
|
55
|
+
job_context: dict[str, Any] = field(default_factory=dict)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
ATS_PATTERNS: dict[str, tuple[str, ...]] = {
|
|
59
|
+
"greenhouse": (
|
|
60
|
+
r"boards\.greenhouse\.io",
|
|
61
|
+
r"job-boards\.greenhouse\.io",
|
|
62
|
+
r"greenhouse\.io/embed",
|
|
63
|
+
),
|
|
64
|
+
"lever": (
|
|
65
|
+
r"jobs\.lever\.co",
|
|
66
|
+
r"lever\.co/[a-z0-9-]+/",
|
|
67
|
+
),
|
|
68
|
+
"ashby": (
|
|
69
|
+
r"jobs\.ashbyhq\.com",
|
|
70
|
+
r"ashbyhq\.com",
|
|
71
|
+
),
|
|
72
|
+
"workday": (
|
|
73
|
+
r"\.myworkdayjobs\.com",
|
|
74
|
+
r"workday\.com",
|
|
75
|
+
),
|
|
76
|
+
"smartrecruiters": (
|
|
77
|
+
r"smartrecruiters\.com",
|
|
78
|
+
r"jobs\.smartrecruiters\.com",
|
|
79
|
+
),
|
|
80
|
+
"recruitee": (r"\.recruitee\.com",),
|
|
81
|
+
"workable": (
|
|
82
|
+
r"apply\.workable\.com",
|
|
83
|
+
r"jobs\.workable\.com",
|
|
84
|
+
),
|
|
85
|
+
"bamboohr": (r"\.bamboohr\.com",),
|
|
86
|
+
"icims": (r"\.icims\.com",),
|
|
87
|
+
"taleo": (r"\.taleo\.net",),
|
|
88
|
+
"successfactors": (
|
|
89
|
+
r"\.successfactors\.com",
|
|
90
|
+
r"jobs\.sap\.com",
|
|
91
|
+
),
|
|
92
|
+
"personio": (
|
|
93
|
+
r"\.personio\.de",
|
|
94
|
+
r"jobs\.personio\.com",
|
|
95
|
+
),
|
|
96
|
+
"teamtailor": (r"\.teamtailor\.com",),
|
|
97
|
+
"jobvite": (
|
|
98
|
+
r"jobs\.jobvite\.com",
|
|
99
|
+
r"\.jobvite\.com",
|
|
100
|
+
),
|
|
101
|
+
"resumator": (r"applytojob\.com",),
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def detect_ats(url: str) -> str:
|
|
106
|
+
"""Return the known ATS identifier for a URL, or ``unknown``."""
|
|
107
|
+
|
|
108
|
+
for ats, patterns in ATS_PATTERNS.items():
|
|
109
|
+
for pattern in patterns:
|
|
110
|
+
if re.search(pattern, url or "", re.IGNORECASE):
|
|
111
|
+
return ats
|
|
112
|
+
return "unknown"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def normalize_space(value: Any) -> str:
|
|
116
|
+
"""Collapse whitespace for stable text comparisons."""
|
|
117
|
+
|
|
118
|
+
return re.sub(r"\s+", " ", str(value or "")).strip()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def load_jobs(input_file: Path) -> list[dict[str, Any]]:
|
|
122
|
+
"""Load a JSON job list or object with a ``jobs`` list from an explicit path."""
|
|
123
|
+
|
|
124
|
+
payload = json.loads(Path(input_file).read_text(encoding="utf-8"))
|
|
125
|
+
if isinstance(payload, list):
|
|
126
|
+
jobs = payload
|
|
127
|
+
elif isinstance(payload, dict) and isinstance(payload.get("jobs"), list):
|
|
128
|
+
jobs = payload["jobs"]
|
|
129
|
+
else:
|
|
130
|
+
raise ValueError("job input must be a list or an object with a jobs list")
|
|
131
|
+
if not all(isinstance(job, dict) for job in jobs):
|
|
132
|
+
raise ValueError("every job entry must be an object")
|
|
133
|
+
return list(jobs)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def load_applied_job_ids(path: Path) -> set[str]:
|
|
137
|
+
"""Load applied job ids from an explicit JSONL path."""
|
|
138
|
+
|
|
139
|
+
applied_path = Path(path).expanduser()
|
|
140
|
+
if not applied_path.exists():
|
|
141
|
+
return set()
|
|
142
|
+
ids: set[str] = set()
|
|
143
|
+
for line in applied_path.read_text(encoding="utf-8").splitlines():
|
|
144
|
+
if not line.strip():
|
|
145
|
+
continue
|
|
146
|
+
try:
|
|
147
|
+
payload = json.loads(line)
|
|
148
|
+
except json.JSONDecodeError:
|
|
149
|
+
continue
|
|
150
|
+
if not isinstance(payload, dict):
|
|
151
|
+
continue
|
|
152
|
+
job_id = str(payload.get("job_id") or "").strip()
|
|
153
|
+
if job_id:
|
|
154
|
+
ids.add(job_id)
|
|
155
|
+
return ids
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def append_applied_job_id(
|
|
159
|
+
path: Path,
|
|
160
|
+
job_id: str,
|
|
161
|
+
*,
|
|
162
|
+
metadata: dict[str, Any] | None = None,
|
|
163
|
+
) -> Path:
|
|
164
|
+
"""Append one applied job id to an explicit JSONL path."""
|
|
165
|
+
|
|
166
|
+
clean_job_id = normalize_space(job_id)
|
|
167
|
+
if not clean_job_id:
|
|
168
|
+
raise ValueError("job_id is required")
|
|
169
|
+
|
|
170
|
+
target = Path(path).expanduser()
|
|
171
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
172
|
+
payload: dict[str, Any] = {
|
|
173
|
+
"job_id": clean_job_id,
|
|
174
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
175
|
+
}
|
|
176
|
+
if metadata:
|
|
177
|
+
payload["metadata"] = dict(metadata)
|
|
178
|
+
with target.open("a", encoding="utf-8") as handle:
|
|
179
|
+
handle.write(json.dumps(payload, ensure_ascii=False, default=str) + "\n")
|
|
180
|
+
return target
|