openapply 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent/__init__.py +26 -0
- agent/batch.py +221 -0
- agent/cv_builder.py +632 -0
- agent/evaluator.py +430 -0
- agent/ollama_client.py +304 -0
- agent/portals_config.py +98 -0
- agent/prompts/apply_form.md +46 -0
- agent/prompts/compare_offers.md +23 -0
- agent/prompts/cover_letter.md +40 -0
- agent/prompts/deep_research.md +39 -0
- agent/prompts/evaluate.md +105 -0
- agent/prompts/outreach.md +35 -0
- agent/prompts/scan_query.md +38 -0
- agent/prompts/tailor_cv.md +50 -0
- agent/scan_history.py +88 -0
- agent/scanner.py +335 -0
- agent/scraper.py +415 -0
- agent/templates/cv.html +266 -0
- agent/url_utils.py +20 -0
- cli/__init__.py +1 -0
- cli/assets/config.example.yml +54 -0
- cli/assets/portals.example.yml +60 -0
- cli/commands/__init__.py +1 -0
- cli/commands/apply.py +116 -0
- cli/commands/batch.py +155 -0
- cli/commands/compare.py +130 -0
- cli/commands/doctor.py +118 -0
- cli/commands/learn.py +242 -0
- cli/commands/outreach.py +105 -0
- cli/commands/pipeline.py +27 -0
- cli/commands/portal.py +239 -0
- cli/commands/research.py +117 -0
- cli/commands/scan.py +163 -0
- cli/commands/setup.py +287 -0
- cli/commands/tracker.py +60 -0
- cli/commands/update.py +35 -0
- cli/flows/offer_pipeline.py +335 -0
- cli/main.py +214 -0
- cli/pipeline_queue.py +109 -0
- cli/tracker_store.py +105 -0
- cli/tui/tracker_app.py +305 -0
- cli/ui.py +205 -0
- memory/db.py +241 -0
- openapply-0.1.0.dist-info/METADATA +267 -0
- openapply-0.1.0.dist-info/RECORD +48 -0
- openapply-0.1.0.dist-info/WHEEL +5 -0
- openapply-0.1.0.dist-info/entry_points.txt +2 -0
- openapply-0.1.0.dist-info/top_level.txt +3 -0
agent/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Open Apply agent layer."""
|
|
2
|
+
|
|
3
|
+
from .batch import BatchProcessor, BatchRunResult, BatchTaskResult
|
|
4
|
+
from .cv_builder import CVBuildResult, CVBuilder, CVBuilderError
|
|
5
|
+
from .evaluator import EvaluationResult, JobEvaluator
|
|
6
|
+
from .ollama_client import OllamaClient, OllamaClientError
|
|
7
|
+
from .scanner import DiscoveredJob, JobScanner, ScanResult
|
|
8
|
+
from .scraper import JobScraper, ScraperError
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"OllamaClient",
|
|
12
|
+
"OllamaClientError",
|
|
13
|
+
"BatchProcessor",
|
|
14
|
+
"BatchRunResult",
|
|
15
|
+
"BatchTaskResult",
|
|
16
|
+
"JobScanner",
|
|
17
|
+
"ScanResult",
|
|
18
|
+
"DiscoveredJob",
|
|
19
|
+
"CVBuilder",
|
|
20
|
+
"CVBuildResult",
|
|
21
|
+
"CVBuilderError",
|
|
22
|
+
"EvaluationResult",
|
|
23
|
+
"JobEvaluator",
|
|
24
|
+
"JobScraper",
|
|
25
|
+
"ScraperError",
|
|
26
|
+
]
|
agent/batch.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Awaitable, Callable
|
|
7
|
+
|
|
8
|
+
from sqlalchemy import desc, select
|
|
9
|
+
from sqlalchemy.orm import Session, sessionmaker
|
|
10
|
+
|
|
11
|
+
from memory.db import CV, Evaluation, Job
|
|
12
|
+
|
|
13
|
+
from .cv_builder import CVBuilder
|
|
14
|
+
from .evaluator import JobEvaluator
|
|
15
|
+
from .scraper import JobScraper
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
ProgressCallback = Callable[["BatchTaskResult"], Awaitable[None] | None]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(slots=True)
|
|
22
|
+
class BatchTaskResult:
|
|
23
|
+
url: str
|
|
24
|
+
status: str
|
|
25
|
+
job_id: int | None = None
|
|
26
|
+
evaluation_id: int | None = None
|
|
27
|
+
cv_id: int | None = None
|
|
28
|
+
grade: str | None = None
|
|
29
|
+
score: float | None = None
|
|
30
|
+
error: str | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(slots=True)
|
|
34
|
+
class BatchRunResult:
|
|
35
|
+
total: int
|
|
36
|
+
processed: int
|
|
37
|
+
succeeded: int
|
|
38
|
+
filtered: int
|
|
39
|
+
skipped: int
|
|
40
|
+
failed: int
|
|
41
|
+
results: list[BatchTaskResult]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BatchProcessor:
|
|
45
|
+
"""Parallel scrape -> evaluate -> CV generation pipeline for URL queues."""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
session_factory: sessionmaker[Session],
|
|
50
|
+
scraper: JobScraper,
|
|
51
|
+
evaluator: JobEvaluator,
|
|
52
|
+
cv_builder: CVBuilder,
|
|
53
|
+
concurrency: int = 3,
|
|
54
|
+
) -> None:
|
|
55
|
+
self._session_factory = session_factory
|
|
56
|
+
self._scraper = scraper
|
|
57
|
+
self._evaluator = evaluator
|
|
58
|
+
self._cv_builder = cv_builder
|
|
59
|
+
self._concurrency = max(1, concurrency)
|
|
60
|
+
|
|
61
|
+
async def process_urls(
|
|
62
|
+
self,
|
|
63
|
+
urls: list[str],
|
|
64
|
+
cv_content: str,
|
|
65
|
+
min_grade: str = "B",
|
|
66
|
+
progress_callback: ProgressCallback | None = None,
|
|
67
|
+
) -> BatchRunResult:
|
|
68
|
+
queue: asyncio.Queue[str] = asyncio.Queue()
|
|
69
|
+
for url in urls:
|
|
70
|
+
queue.put_nowait(url)
|
|
71
|
+
|
|
72
|
+
results: list[BatchTaskResult] = []
|
|
73
|
+
results_lock = asyncio.Lock()
|
|
74
|
+
|
|
75
|
+
async def worker() -> None:
|
|
76
|
+
while True:
|
|
77
|
+
try:
|
|
78
|
+
url = queue.get_nowait()
|
|
79
|
+
except asyncio.QueueEmpty:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
result = await self._process_one(url=url, cv_content=cv_content, min_grade=min_grade)
|
|
83
|
+
|
|
84
|
+
async with results_lock:
|
|
85
|
+
results.append(result)
|
|
86
|
+
|
|
87
|
+
if progress_callback is not None:
|
|
88
|
+
maybe = progress_callback(result)
|
|
89
|
+
if asyncio.iscoroutine(maybe):
|
|
90
|
+
await maybe
|
|
91
|
+
|
|
92
|
+
queue.task_done()
|
|
93
|
+
|
|
94
|
+
workers = [asyncio.create_task(worker()) for _ in range(self._concurrency)]
|
|
95
|
+
await asyncio.gather(*workers)
|
|
96
|
+
|
|
97
|
+
succeeded = sum(1 for row in results if row.status == "succeeded")
|
|
98
|
+
filtered = sum(1 for row in results if row.status == "filtered")
|
|
99
|
+
skipped = sum(1 for row in results if row.status == "skipped")
|
|
100
|
+
failed = sum(1 for row in results if row.status == "failed")
|
|
101
|
+
|
|
102
|
+
return BatchRunResult(
|
|
103
|
+
total=len(urls),
|
|
104
|
+
processed=len(results),
|
|
105
|
+
succeeded=succeeded,
|
|
106
|
+
filtered=filtered,
|
|
107
|
+
skipped=skipped,
|
|
108
|
+
failed=failed,
|
|
109
|
+
results=results,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
async def _process_one(self, url: str, cv_content: str, min_grade: str) -> BatchTaskResult:
|
|
113
|
+
try:
|
|
114
|
+
resume_state = self._resume_state(url)
|
|
115
|
+
if resume_state is not None:
|
|
116
|
+
return resume_state
|
|
117
|
+
|
|
118
|
+
jd = await self._scraper.scrape_jd(url)
|
|
119
|
+
job = self._upsert_job(
|
|
120
|
+
url=url,
|
|
121
|
+
company=str(jd.get("company", "")).strip() or None,
|
|
122
|
+
role=str(jd.get("title", "")).strip() or None,
|
|
123
|
+
description=str(jd.get("description", "")).strip(),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
eval_result = await self._evaluator.evaluate_job(job.id, cv_content)
|
|
127
|
+
if self._grade_rank(eval_result.grade) < self._grade_rank(min_grade):
|
|
128
|
+
self._mark_job_skipped(job.id)
|
|
129
|
+
return BatchTaskResult(
|
|
130
|
+
url=url,
|
|
131
|
+
status="filtered",
|
|
132
|
+
job_id=job.id,
|
|
133
|
+
evaluation_id=eval_result.evaluation_id,
|
|
134
|
+
grade=eval_result.grade,
|
|
135
|
+
score=eval_result.weighted_total,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if eval_result.evaluation_id is None:
|
|
139
|
+
raise RuntimeError("Evaluation did not persist an evaluation_id.")
|
|
140
|
+
|
|
141
|
+
cv_result = await self._cv_builder.build_for_job(job.id, eval_result.evaluation_id)
|
|
142
|
+
return BatchTaskResult(
|
|
143
|
+
url=url,
|
|
144
|
+
status="succeeded",
|
|
145
|
+
job_id=job.id,
|
|
146
|
+
evaluation_id=eval_result.evaluation_id,
|
|
147
|
+
cv_id=cv_result.cv_id,
|
|
148
|
+
grade=eval_result.grade,
|
|
149
|
+
score=eval_result.weighted_total,
|
|
150
|
+
)
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
return BatchTaskResult(url=url, status="failed", error=str(exc))
|
|
153
|
+
|
|
154
|
+
def _resume_state(self, url: str) -> BatchTaskResult | None:
|
|
155
|
+
with self._session_factory() as session:
|
|
156
|
+
job = session.scalars(select(Job).where(Job.url == url)).first()
|
|
157
|
+
if job is None:
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
latest_eval = session.scalars(
|
|
161
|
+
select(Evaluation).where(Evaluation.job_id == job.id).order_by(desc(Evaluation.id)).limit(1)
|
|
162
|
+
).first()
|
|
163
|
+
latest_cv = session.scalars(
|
|
164
|
+
select(CV).where(CV.job_id == job.id).order_by(desc(CV.id)).limit(1)
|
|
165
|
+
).first()
|
|
166
|
+
|
|
167
|
+
if latest_eval is None:
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
if latest_cv is not None:
|
|
171
|
+
return BatchTaskResult(
|
|
172
|
+
url=url,
|
|
173
|
+
status="skipped",
|
|
174
|
+
job_id=job.id,
|
|
175
|
+
evaluation_id=latest_eval.id,
|
|
176
|
+
cv_id=latest_cv.id,
|
|
177
|
+
grade=latest_eval.grade,
|
|
178
|
+
score=latest_eval.score_total,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
def _upsert_job(self, url: str, company: str | None, role: str | None, description: str) -> Job:
|
|
184
|
+
with self._session_factory() as session:
|
|
185
|
+
job = session.scalars(select(Job).where(Job.url == url)).first()
|
|
186
|
+
if job is None:
|
|
187
|
+
job = Job(
|
|
188
|
+
url=url,
|
|
189
|
+
company=company,
|
|
190
|
+
role=role,
|
|
191
|
+
jd_raw=description,
|
|
192
|
+
jd_extracted=description,
|
|
193
|
+
scraped_at=datetime.now(timezone.utc),
|
|
194
|
+
status="new",
|
|
195
|
+
)
|
|
196
|
+
session.add(job)
|
|
197
|
+
else:
|
|
198
|
+
job.company = company or job.company
|
|
199
|
+
job.role = role or job.role
|
|
200
|
+
job.jd_raw = description or job.jd_raw
|
|
201
|
+
job.jd_extracted = description or job.jd_extracted
|
|
202
|
+
job.scraped_at = datetime.now(timezone.utc)
|
|
203
|
+
session.add(job)
|
|
204
|
+
|
|
205
|
+
session.commit()
|
|
206
|
+
session.refresh(job)
|
|
207
|
+
return job
|
|
208
|
+
|
|
209
|
+
def _mark_job_skipped(self, job_id: int) -> None:
|
|
210
|
+
with self._session_factory() as session:
|
|
211
|
+
job = session.get(Job, job_id)
|
|
212
|
+
if job is None:
|
|
213
|
+
return
|
|
214
|
+
job.status = "skipped"
|
|
215
|
+
session.add(job)
|
|
216
|
+
session.commit()
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def _grade_rank(grade: str) -> int:
|
|
220
|
+
order = {"A": 5, "B": 4, "C": 3, "D": 2, "F": 1}
|
|
221
|
+
return order.get(grade.upper().strip(), 1)
|