finch-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- finch_cli/__init__.py +3 -0
- finch_cli/__main__.py +4 -0
- finch_cli/_data/base_resume.md +32 -0
- finch_cli/_data/job_post.txt +25 -0
- finch_cli/cli.py +155 -0
- finch_cli/fetch.py +131 -0
- finch_cli/jobs.py +166 -0
- finch_cli/output.py +20 -0
- finch_cli/score.py +166 -0
- finch_cli/storage.py +81 -0
- finch_cli/tailor.py +150 -0
- finch_cli/tui.py +782 -0
- finch_cli-0.1.1.dist-info/METADATA +140 -0
- finch_cli-0.1.1.dist-info/RECORD +17 -0
- finch_cli-0.1.1.dist-info/WHEEL +4 -0
- finch_cli-0.1.1.dist-info/entry_points.txt +2 -0
- finch_cli-0.1.1.dist-info/licenses/LICENSE +21 -0
finch_cli/__init__.py
ADDED
finch_cli/__main__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Jordan Park
|
|
2
|
+
|
|
3
|
+
jordan@example.com · linkedin.com/in/jordanpark · github.com/jordanpark · College Station, TX
|
|
4
|
+
|
|
5
|
+
## Education
|
|
6
|
+
|
|
7
|
+
**Texas A&M University**, BS Computer Science, May 2026
|
|
8
|
+
- GPA: 3.84 / 4.00
|
|
9
|
+
- Coursework: Machine Learning, Distributed Systems, Linear Algebra, Operating Systems, Databases
|
|
10
|
+
|
|
11
|
+
## Experience
|
|
12
|
+
|
|
13
|
+
**Engineering Intern, Nimbus Labs** - May 2025 to Aug 2025
|
|
14
|
+
- Shipped a distributed training pipeline that cut iteration time from 4 hours to 38 minutes
|
|
15
|
+
- Wrote a 200-test evaluation harness that flagged 3 silent regressions in the production model
|
|
16
|
+
- Owned the model rollout to staging across 14 GPUs with zero-downtime cutover
|
|
17
|
+
|
|
18
|
+
**SWE Intern, Stripe (Issuing)** - May 2024 to Aug 2024
|
|
19
|
+
- Implemented an idempotency layer for the card-issuing API, cut duplicate-charge incidents 92 percent
|
|
20
|
+
- Reviewed 28 PRs across three teams and wrote the team's first incident-runbook template
|
|
21
|
+
|
|
22
|
+
## Projects
|
|
23
|
+
|
|
24
|
+
**Tiny GPT trainer** - PyTorch, CUDA, wandb
|
|
25
|
+
From-scratch GPT-2 small implementation with FlashAttention, fully open-sourced, 1.2k GitHub stars.
|
|
26
|
+
|
|
27
|
+
**Climate dashboard** - React, D3, Tailwind
|
|
28
|
+
Interactive visualization of NOAA temperature anomalies for 8,000 monitoring stations, deployed to Cloudflare Pages.
|
|
29
|
+
|
|
30
|
+
## Skills
|
|
31
|
+
|
|
32
|
+
Python, PyTorch, CUDA, distributed training, Go, Rust, React, TypeScript, PostgreSQL, Redis, Docker, Linux
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Software Engineer Intern, Distributed Systems
|
|
2
|
+
Stripe - Remote (US / Canada)
|
|
3
|
+
|
|
4
|
+
About the team
|
|
5
|
+
|
|
6
|
+
The Distributed Systems team at Stripe owns the infrastructure that moves money across our platform. We run multi-region postgres and kafka clusters, build the storage primitives that every product team uses, and keep latency under 50ms p99 across continents. You will be joining a focused team where every change you ship affects every Stripe API call.
|
|
7
|
+
|
|
8
|
+
What you will do
|
|
9
|
+
|
|
10
|
+
- Design and implement features in core distributed systems written in Go and Rust
|
|
11
|
+
- Improve the reliability and observability of our storage layer (postgres, kafka, redis)
|
|
12
|
+
- Reduce p99 latency for high-throughput services through profiling, indexing, and caching changes
|
|
13
|
+
- Write the runbooks and instrumentation that let your team debug production issues without paging the original author
|
|
14
|
+
- Collaborate with senior engineers on incident response and write postmortems
|
|
15
|
+
|
|
16
|
+
What we look for
|
|
17
|
+
|
|
18
|
+
- Currently pursuing a BS, MS, or PhD in CS, EE, or a related field with strong systems coursework
|
|
19
|
+
- Proficient in at least one of Go, Rust, Python, or C++
|
|
20
|
+
- Experience with concurrency primitives, networking, or distributed systems through coursework or projects
|
|
21
|
+
- Comfortable reading large unfamiliar codebases and shipping non-trivial PRs
|
|
22
|
+
- Bonus: open source contributions, prior internship at a high-traffic SaaS, exposure to postgres internals, kafka, or eBPF
|
|
23
|
+
|
|
24
|
+
Compensation
|
|
25
|
+
$58 to $72 per hour, depending on year and location.
|
finch_cli/cli.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Finch CLI - entry point."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from . import __version__
|
|
12
|
+
from .fetch import FetchError, fetch_job
|
|
13
|
+
from .output import write_or_print
|
|
14
|
+
from .tailor import DEFAULT_BASE_URL, DEFAULT_MODEL, TailorError, tailor_resume
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
|
|
20
|
+
@click.version_option(__version__, prog_name="finch")
|
|
21
|
+
def main() -> None:
|
|
22
|
+
"""Finch - tailor your resume to any job posting from your terminal.
|
|
23
|
+
|
|
24
|
+
Run `finch tailor --help` or `finch ui --help` to get started, or visit
|
|
25
|
+
https://applyfinch.com for the full web product (auto-apply included,
|
|
26
|
+
free during beta).
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@main.command()
|
|
31
|
+
@click.option(
|
|
32
|
+
"--resume",
|
|
33
|
+
"-r",
|
|
34
|
+
"resume_path",
|
|
35
|
+
required=True,
|
|
36
|
+
type=click.Path(exists=True, dir_okay=False, readable=True),
|
|
37
|
+
help="Path to your base resume (markdown or plain text).",
|
|
38
|
+
)
|
|
39
|
+
@click.option(
|
|
40
|
+
"--job",
|
|
41
|
+
"-j",
|
|
42
|
+
"job_url",
|
|
43
|
+
default=None,
|
|
44
|
+
help="URL of the job posting.",
|
|
45
|
+
)
|
|
46
|
+
@click.option(
|
|
47
|
+
"--job-file",
|
|
48
|
+
"job_file",
|
|
49
|
+
default=None,
|
|
50
|
+
type=click.Path(exists=True, dir_okay=False, readable=True),
|
|
51
|
+
help="Path to a text file containing the job posting (use if --job won't extract).",
|
|
52
|
+
)
|
|
53
|
+
@click.option(
|
|
54
|
+
"--out",
|
|
55
|
+
"-o",
|
|
56
|
+
"out_path",
|
|
57
|
+
default=None,
|
|
58
|
+
type=click.Path(dir_okay=False, writable=True),
|
|
59
|
+
help="Write the tailored resume to this path. If omitted, prints to terminal.",
|
|
60
|
+
)
|
|
61
|
+
@click.option(
|
|
62
|
+
"--model",
|
|
63
|
+
default=DEFAULT_MODEL,
|
|
64
|
+
show_default=True,
|
|
65
|
+
help="Model name. Any chat model on the configured base-url.",
|
|
66
|
+
)
|
|
67
|
+
@click.option(
|
|
68
|
+
"--api-key",
|
|
69
|
+
default=None,
|
|
70
|
+
help="API key. Defaults to $DEEPSEEK_API_KEY, $FINCH_API_KEY, or $OPENAI_API_KEY.",
|
|
71
|
+
)
|
|
72
|
+
@click.option(
|
|
73
|
+
"--base-url",
|
|
74
|
+
default=None,
|
|
75
|
+
show_default=False,
|
|
76
|
+
help=f"OpenAI-compatible base URL. Default: {DEFAULT_BASE_URL} (DeepSeek). "
|
|
77
|
+
"Works with OpenAI, Together, Groq, Fireworks, anything API-compatible.",
|
|
78
|
+
)
|
|
79
|
+
def tailor(
|
|
80
|
+
resume_path: str,
|
|
81
|
+
job_url: str | None,
|
|
82
|
+
job_file: str | None,
|
|
83
|
+
out_path: str | None,
|
|
84
|
+
model: str,
|
|
85
|
+
api_key: str | None,
|
|
86
|
+
base_url: str | None,
|
|
87
|
+
) -> None:
|
|
88
|
+
"""Tailor your resume to a specific job posting."""
|
|
89
|
+
if not job_url and not job_file:
|
|
90
|
+
console.print("[red]Error:[/red] provide either --job <url> or --job-file <path>.")
|
|
91
|
+
sys.exit(2)
|
|
92
|
+
if job_url and job_file:
|
|
93
|
+
console.print("[red]Error:[/red] use only one of --job or --job-file.")
|
|
94
|
+
sys.exit(2)
|
|
95
|
+
|
|
96
|
+
base_resume = Path(resume_path).read_text(encoding="utf-8")
|
|
97
|
+
if not base_resume.strip():
|
|
98
|
+
console.print(f"[red]Error:[/red] resume file {resume_path} is empty.")
|
|
99
|
+
sys.exit(2)
|
|
100
|
+
|
|
101
|
+
if job_file:
|
|
102
|
+
job_text = Path(job_file).read_text(encoding="utf-8")
|
|
103
|
+
console.print(f"[dim]Read job description from {job_file} ({len(job_text):,} chars).[/dim]")
|
|
104
|
+
else:
|
|
105
|
+
try:
|
|
106
|
+
with console.status(f"Fetching job posting from {job_url}...", spinner="dots"):
|
|
107
|
+
job_text = fetch_job(job_url)
|
|
108
|
+
except FetchError as e:
|
|
109
|
+
console.print(f"[red]Fetch failed:[/red] {e}")
|
|
110
|
+
sys.exit(1)
|
|
111
|
+
console.print(f"[dim]Fetched job posting ({len(job_text):,} chars).[/dim]")
|
|
112
|
+
|
|
113
|
+
if base_url and base_url != DEFAULT_BASE_URL:
|
|
114
|
+
console.print(
|
|
115
|
+
f"[yellow]warning:[/] sending your API key to {base_url}. "
|
|
116
|
+
f"only use --base-url for providers you trust."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
with console.status("Tailoring resume against the job...", spinner="dots"):
|
|
121
|
+
tailored = tailor_resume(
|
|
122
|
+
base_resume,
|
|
123
|
+
job_text,
|
|
124
|
+
model=model,
|
|
125
|
+
api_key=api_key,
|
|
126
|
+
base_url=base_url,
|
|
127
|
+
)
|
|
128
|
+
except TailorError as e:
|
|
129
|
+
console.print(f"[red]Tailoring failed:[/red] {e}")
|
|
130
|
+
sys.exit(1)
|
|
131
|
+
|
|
132
|
+
write_or_print(tailored, out_path)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@main.command()
|
|
136
|
+
@click.option(
|
|
137
|
+
"--demo",
|
|
138
|
+
is_flag=True,
|
|
139
|
+
help="Pre-load the bundled example resume and job posting on launch.",
|
|
140
|
+
)
|
|
141
|
+
@click.option(
|
|
142
|
+
"--model",
|
|
143
|
+
default=DEFAULT_MODEL,
|
|
144
|
+
show_default=True,
|
|
145
|
+
help="Model name. Any chat model on the configured base-url.",
|
|
146
|
+
)
|
|
147
|
+
def ui(demo: bool, model: str) -> None:
|
|
148
|
+
"""Launch the Finch TUI - jobs / library / tailor in three tabs."""
|
|
149
|
+
from .tui import run as run_tui
|
|
150
|
+
|
|
151
|
+
run_tui(demo=demo, model=model)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
if __name__ == "__main__":
|
|
155
|
+
main()
|
finch_cli/fetch.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Fetch a job posting URL and extract the readable content.
|
|
2
|
+
|
|
3
|
+
Defends against SSRF: only http/https schemes, refuses private / loopback /
|
|
4
|
+
link-local / multicast / reserved IP ranges. The hostname is resolved with
|
|
5
|
+
getaddrinfo so DNS-rebinding-style "evil.example -> 127.0.0.1" is caught at
|
|
6
|
+
both the original URL and after each redirect hop.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ipaddress
|
|
12
|
+
import socket
|
|
13
|
+
from urllib.parse import urlparse
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
import trafilatura
|
|
17
|
+
|
|
18
|
+
DEFAULT_TIMEOUT = 30.0
|
|
19
|
+
DEFAULT_MAX_BYTES = 5 * 1024 * 1024 # 5 MB cap on fetched bodies
|
|
20
|
+
DEFAULT_MAX_REDIRECTS = 5
|
|
21
|
+
DEFAULT_UA = (
|
|
22
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
|
23
|
+
"(KHTML, like Gecko) finch-cli/0.1 Safari/537.36"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class FetchError(RuntimeError):
|
|
28
|
+
"""A fetch or extraction failure with a user-facing message."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _check_url_safe(url: str) -> None:
|
|
32
|
+
"""Raise FetchError if the URL points anywhere we refuse to fetch from.
|
|
33
|
+
|
|
34
|
+
- scheme must be http or https
|
|
35
|
+
- resolves the host; refuses private / loopback / link-local / multicast / reserved IPs
|
|
36
|
+
(blocks 127.0.0.1, ::1, 10/8, 172.16/12, 192.168/16, 169.254/16, etc.)
|
|
37
|
+
"""
|
|
38
|
+
parsed = urlparse(url)
|
|
39
|
+
if parsed.scheme not in ("http", "https"):
|
|
40
|
+
raise FetchError(
|
|
41
|
+
f"refusing scheme {parsed.scheme!r}: only http and https are allowed."
|
|
42
|
+
)
|
|
43
|
+
host = parsed.hostname
|
|
44
|
+
if not host:
|
|
45
|
+
raise FetchError(f"refusing url with no host: {url}")
|
|
46
|
+
try:
|
|
47
|
+
infos = socket.getaddrinfo(host, None)
|
|
48
|
+
except socket.gaierror as e:
|
|
49
|
+
raise FetchError(f"could not resolve host {host}: {e}") from e
|
|
50
|
+
for info in infos:
|
|
51
|
+
addr = info[4][0]
|
|
52
|
+
try:
|
|
53
|
+
ip = ipaddress.ip_address(addr)
|
|
54
|
+
except ValueError:
|
|
55
|
+
continue
|
|
56
|
+
if (
|
|
57
|
+
ip.is_private
|
|
58
|
+
or ip.is_loopback
|
|
59
|
+
or ip.is_link_local
|
|
60
|
+
or ip.is_multicast
|
|
61
|
+
or ip.is_reserved
|
|
62
|
+
or ip.is_unspecified
|
|
63
|
+
):
|
|
64
|
+
raise FetchError(
|
|
65
|
+
f"refusing to fetch internal/private host {host} ({addr}). "
|
|
66
|
+
f"if this is a public job page, paste the description into a file "
|
|
67
|
+
f"and use --job-file instead."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def fetch_url(
|
|
72
|
+
url: str,
|
|
73
|
+
*,
|
|
74
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
75
|
+
max_bytes: int = DEFAULT_MAX_BYTES,
|
|
76
|
+
max_redirects: int = DEFAULT_MAX_REDIRECTS,
|
|
77
|
+
) -> str:
|
|
78
|
+
"""Fetch a URL and return raw HTML. SSRF-checked at every redirect hop."""
|
|
79
|
+
_check_url_safe(url)
|
|
80
|
+
headers = {
|
|
81
|
+
"User-Agent": DEFAULT_UA,
|
|
82
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
83
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
84
|
+
}
|
|
85
|
+
current = url
|
|
86
|
+
try:
|
|
87
|
+
with httpx.Client(
|
|
88
|
+
timeout=timeout, follow_redirects=False, headers=headers
|
|
89
|
+
) as client:
|
|
90
|
+
for _ in range(max_redirects + 1):
|
|
91
|
+
r = client.get(current)
|
|
92
|
+
if 300 <= r.status_code < 400 and r.headers.get("location"):
|
|
93
|
+
next_url = str(httpx.URL(r.headers["location"], base=r.url))
|
|
94
|
+
_check_url_safe(next_url)
|
|
95
|
+
current = next_url
|
|
96
|
+
continue
|
|
97
|
+
r.raise_for_status()
|
|
98
|
+
if len(r.content) > max_bytes:
|
|
99
|
+
raise FetchError(
|
|
100
|
+
f"response too large ({len(r.content):,} bytes; "
|
|
101
|
+
f"limit is {max_bytes:,}). aborting."
|
|
102
|
+
)
|
|
103
|
+
return r.text
|
|
104
|
+
raise FetchError(f"too many redirects (more than {max_redirects}).")
|
|
105
|
+
except httpx.HTTPStatusError as e:
|
|
106
|
+
raise FetchError(
|
|
107
|
+
f"job page returned HTTP {e.response.status_code}. "
|
|
108
|
+
f"it may be behind auth or geo-blocked. "
|
|
109
|
+
f"try saving the description to a file and use --job-file."
|
|
110
|
+
) from e
|
|
111
|
+
except httpx.HTTPError as e:
|
|
112
|
+
raise FetchError(f"could not fetch job page: {e}") from e
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def extract_text(html: str) -> str:
|
|
116
|
+
"""Extract the main readable text from a page."""
|
|
117
|
+
text = trafilatura.extract(
|
|
118
|
+
html, include_comments=False, include_tables=True, favor_recall=True
|
|
119
|
+
)
|
|
120
|
+
if not text or len(text.strip()) < 80:
|
|
121
|
+
raise FetchError(
|
|
122
|
+
"Could not extract readable content from the job page. "
|
|
123
|
+
"It probably requires JavaScript to render (Workday, Greenhouse iframe, etc.). "
|
|
124
|
+
"Paste the job description into a text file and pass it with --job-file instead."
|
|
125
|
+
)
|
|
126
|
+
return text.strip()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def fetch_job(url: str) -> str:
|
|
130
|
+
"""Convenience: fetch a URL and extract its readable text."""
|
|
131
|
+
return extract_text(fetch_url(url))
|
finch_cli/jobs.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""Fetch the SimplifyJobs internship + new-grad lists.
|
|
2
|
+
|
|
3
|
+
Same two sources Finch's job bank uses. Cached locally for fast TUI reloads.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import httpx
|
|
15
|
+
|
|
16
|
+
SOURCES: dict[str, str] = {
|
|
17
|
+
"intern": "https://raw.githubusercontent.com/SimplifyJobs/Summer2026-Internships/dev/.github/scripts/listings.json",
|
|
18
|
+
"newgrad": "https://raw.githubusercontent.com/SimplifyJobs/New-Grad-Positions/dev/.github/scripts/listings.json",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
CACHE_DIR = (
|
|
22
|
+
Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache"))) / "finch-cli"
|
|
23
|
+
)
|
|
24
|
+
CACHE_TTL_SECONDS = 6 * 60 * 60 # six hours
|
|
25
|
+
MAX_FEED_BYTES = 25 * 1024 * 1024 # 25 MB safety cap on feed size
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class Job:
|
|
30
|
+
id: str
|
|
31
|
+
source: str
|
|
32
|
+
company: str
|
|
33
|
+
title: str
|
|
34
|
+
locations: list[str]
|
|
35
|
+
url: str
|
|
36
|
+
terms: list[str]
|
|
37
|
+
sponsorship: str | None
|
|
38
|
+
active: bool
|
|
39
|
+
date_posted: int
|
|
40
|
+
date_updated: int
|
|
41
|
+
category: str | None
|
|
42
|
+
degrees: list[str] = field(default_factory=list)
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_json(cls, d: dict, source: str) -> "Job":
|
|
46
|
+
return cls(
|
|
47
|
+
id=str(d.get("id", "")),
|
|
48
|
+
source=source,
|
|
49
|
+
company=d.get("company_name") or "",
|
|
50
|
+
title=d.get("title") or "",
|
|
51
|
+
locations=list(d.get("locations") or []),
|
|
52
|
+
url=d.get("url") or "",
|
|
53
|
+
terms=list(d.get("terms") or []),
|
|
54
|
+
sponsorship=d.get("sponsorship"),
|
|
55
|
+
active=bool(d.get("active", False)),
|
|
56
|
+
date_posted=int(d.get("date_posted") or 0),
|
|
57
|
+
date_updated=int(d.get("date_updated") or 0),
|
|
58
|
+
category=d.get("category"),
|
|
59
|
+
degrees=list(d.get("degrees") or []),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def age_label(self) -> str:
|
|
63
|
+
if not self.date_posted:
|
|
64
|
+
return "?"
|
|
65
|
+
delta = datetime.now(timezone.utc).timestamp() - self.date_posted
|
|
66
|
+
days = int(delta // 86400)
|
|
67
|
+
if days < 1:
|
|
68
|
+
hours = int(delta // 3600)
|
|
69
|
+
return f"{max(hours, 0)}h"
|
|
70
|
+
if days < 30:
|
|
71
|
+
return f"{days}d"
|
|
72
|
+
months = days // 30
|
|
73
|
+
return f"{months}mo"
|
|
74
|
+
|
|
75
|
+
def loc_label(self) -> str:
|
|
76
|
+
if not self.locations:
|
|
77
|
+
return "-"
|
|
78
|
+
if len(self.locations) == 1:
|
|
79
|
+
return self.locations[0]
|
|
80
|
+
return f"{self.locations[0]} +{len(self.locations) - 1}"
|
|
81
|
+
|
|
82
|
+
def term_label(self) -> str:
|
|
83
|
+
return ", ".join(self.terms) if self.terms else "-"
|
|
84
|
+
|
|
85
|
+
def sponsorship_label(self) -> str:
|
|
86
|
+
s = (self.sponsorship or "").strip()
|
|
87
|
+
if not s:
|
|
88
|
+
return "?"
|
|
89
|
+
# Compact known buckets
|
|
90
|
+
sl = s.lower()
|
|
91
|
+
if "u.s. citizen" in sl or "us citizen" in sl:
|
|
92
|
+
return "US only"
|
|
93
|
+
if "offered" in sl:
|
|
94
|
+
return "sponsors"
|
|
95
|
+
if "no sponsorship" in sl or "does not offer" in sl:
|
|
96
|
+
return "no spon"
|
|
97
|
+
return s[:10]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _cache_path(source: str) -> Path:
|
|
101
|
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
102
|
+
return CACHE_DIR / f"{source}.json"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _is_fresh(p: Path) -> bool:
|
|
106
|
+
if not p.exists():
|
|
107
|
+
return False
|
|
108
|
+
age = datetime.now(timezone.utc).timestamp() - p.stat().st_mtime
|
|
109
|
+
return age < CACHE_TTL_SECONDS
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def load_jobs(*, force_refresh: bool = False) -> list[Job]:
|
|
113
|
+
"""Return active jobs from both sources, sorted by date_posted desc."""
|
|
114
|
+
all_jobs: list[Job] = []
|
|
115
|
+
for source, url in SOURCES.items():
|
|
116
|
+
cache = _cache_path(source)
|
|
117
|
+
if force_refresh or not _is_fresh(cache):
|
|
118
|
+
with httpx.Client(timeout=30, follow_redirects=True) as client:
|
|
119
|
+
r = client.get(url, headers={"User-Agent": "finch-cli/0.1"})
|
|
120
|
+
r.raise_for_status()
|
|
121
|
+
if len(r.content) > MAX_FEED_BYTES:
|
|
122
|
+
continue # upstream feed too large; skip safely
|
|
123
|
+
cache.write_text(r.text, encoding="utf-8")
|
|
124
|
+
try:
|
|
125
|
+
data = json.loads(cache.read_text(encoding="utf-8"))
|
|
126
|
+
except json.JSONDecodeError:
|
|
127
|
+
continue
|
|
128
|
+
if not isinstance(data, list):
|
|
129
|
+
continue
|
|
130
|
+
for d in data:
|
|
131
|
+
if not isinstance(d, dict) or not d.get("id"):
|
|
132
|
+
continue
|
|
133
|
+
try:
|
|
134
|
+
all_jobs.append(Job.from_json(d, source))
|
|
135
|
+
except Exception:
|
|
136
|
+
# one malformed record shouldn't kill the whole feed
|
|
137
|
+
continue
|
|
138
|
+
active = [j for j in all_jobs if j.active]
|
|
139
|
+
active.sort(key=lambda j: j.date_posted, reverse=True)
|
|
140
|
+
return active
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def filter_jobs(
|
|
144
|
+
jobs: list[Job],
|
|
145
|
+
*,
|
|
146
|
+
query: str = "",
|
|
147
|
+
source: str | None = None,
|
|
148
|
+
term_substring: str | None = None,
|
|
149
|
+
) -> list[Job]:
|
|
150
|
+
q = query.lower().strip()
|
|
151
|
+
out: list[Job] = []
|
|
152
|
+
for j in jobs:
|
|
153
|
+
if source and j.source != source:
|
|
154
|
+
continue
|
|
155
|
+
if term_substring:
|
|
156
|
+
ts = term_substring.lower()
|
|
157
|
+
if not any(ts in t.lower() for t in j.terms):
|
|
158
|
+
continue
|
|
159
|
+
if q:
|
|
160
|
+
blob = (
|
|
161
|
+
f"{j.title} {j.company} {' '.join(j.locations)} {j.category or ''}"
|
|
162
|
+
).lower()
|
|
163
|
+
if q not in blob:
|
|
164
|
+
continue
|
|
165
|
+
out.append(j)
|
|
166
|
+
return out
|
finch_cli/output.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Output helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.markdown import Markdown
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def write_or_print(content: str, out_path: str | None) -> None:
|
|
12
|
+
console = Console()
|
|
13
|
+
if out_path:
|
|
14
|
+
Path(out_path).write_text(content, encoding="utf-8")
|
|
15
|
+
console.print(
|
|
16
|
+
f"[green]wrote[/green] {out_path} ({len(content):,} chars, "
|
|
17
|
+
f"{len(content.splitlines()):,} lines)"
|
|
18
|
+
)
|
|
19
|
+
else:
|
|
20
|
+
console.print(Markdown(content))
|