jobfeeds 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jobfeeds-0.1.0/.github/workflows/release.yml +36 -0
- jobfeeds-0.1.0/.github/workflows/test.yml +22 -0
- jobfeeds-0.1.0/.gitignore +11 -0
- jobfeeds-0.1.0/CHANGELOG.md +20 -0
- jobfeeds-0.1.0/LICENSE +21 -0
- jobfeeds-0.1.0/PKG-INFO +139 -0
- jobfeeds-0.1.0/README.md +111 -0
- jobfeeds-0.1.0/pyproject.toml +46 -0
- jobfeeds-0.1.0/src/jobfeeds/__init__.py +37 -0
- jobfeeds-0.1.0/src/jobfeeds/_text.py +34 -0
- jobfeeds-0.1.0/src/jobfeeds/fetch.py +45 -0
- jobfeeds-0.1.0/src/jobfeeds/models.py +58 -0
- jobfeeds-0.1.0/src/jobfeeds/py.typed +0 -0
- jobfeeds-0.1.0/src/jobfeeds/sources/__init__.py +17 -0
- jobfeeds-0.1.0/src/jobfeeds/sources/arbeitnow.py +81 -0
- jobfeeds-0.1.0/src/jobfeeds/sources/base.py +20 -0
- jobfeeds-0.1.0/src/jobfeeds/sources/greenhouse.py +121 -0
- jobfeeds-0.1.0/src/jobfeeds/sources/hn_hiring.py +135 -0
- jobfeeds-0.1.0/src/jobfeeds/sources/remoteok.py +68 -0
- jobfeeds-0.1.0/src/jobfeeds/sources/remotive.py +65 -0
- jobfeeds-0.1.0/src/jobfeeds/sources/wwr.py +100 -0
- jobfeeds-0.1.0/tests/test_fetch_all.py +62 -0
- jobfeeds-0.1.0/tests/test_sources.py +235 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ["v*"]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
test:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
- uses: actions/setup-python@v5
|
|
13
|
+
with:
|
|
14
|
+
python-version: "3.12"
|
|
15
|
+
- name: Install
|
|
16
|
+
run: pip install -e .[dev]
|
|
17
|
+
- name: Test
|
|
18
|
+
run: pytest -q
|
|
19
|
+
|
|
20
|
+
publish:
|
|
21
|
+
needs: test
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
environment: pypi
|
|
24
|
+
permissions:
|
|
25
|
+
id-token: write # PyPI Trusted Publishing (OIDC), no long-lived token
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: "3.12"
|
|
31
|
+
- name: Build
|
|
32
|
+
run: |
|
|
33
|
+
pip install build
|
|
34
|
+
python -m build
|
|
35
|
+
- name: Publish to PyPI
|
|
36
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: test
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
- uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: ${{ matrix.python-version }}
|
|
19
|
+
- name: Install
|
|
20
|
+
run: pip install -e .[dev]
|
|
21
|
+
- name: Test
|
|
22
|
+
run: pytest -q
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to jobfeeds are documented here. The format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/) and the project adheres to
|
|
5
|
+
[Semantic Versioning](https://semver.org/).
|
|
6
|
+
|
|
7
|
+
## [0.1.0] - 2026-07-04
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- Six async adapters behind one interface: `Remotive`, `Arbeitnow`,
|
|
12
|
+
`Greenhouse`, `RemoteOK`, `WeWorkRemotely`, `HNWhoIsHiring`.
|
|
13
|
+
- `Posting` pydantic model normalized across sources, with `posting_hash()`
|
|
14
|
+
for cross-source dedupe.
|
|
15
|
+
- `fetch_all()` with per-source failure isolation (`SourceResult`).
|
|
16
|
+
- Per-adapter configuration via constructor args: Greenhouse board tokens,
|
|
17
|
+
per-board cap and optional title pattern; Arbeitnow page count;
|
|
18
|
+
We Work Remotely category feeds; timeout and user agent everywhere.
|
|
19
|
+
- `py.typed` marker; the package is fully typed.
|
|
20
|
+
- Test suite with all HTTP traffic mocked (respx); no live calls in CI.
|
jobfeeds-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nandor Nagy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
jobfeeds-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jobfeeds
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: One typed, async Python interface to six public job boards: Remotive, Arbeitnow, Greenhouse, RemoteOK, We Work Remotely, and Hacker News Who is hiring
|
|
5
|
+
Project-URL: Homepage, https://github.com/n3ndor/jobfeeds
|
|
6
|
+
Project-URL: Repository, https://github.com/n3ndor/jobfeeds
|
|
7
|
+
Project-URL: Changelog, https://github.com/n3ndor/jobfeeds/blob/main/CHANGELOG.md
|
|
8
|
+
Project-URL: Used in production by, https://jobradar.nagysolution.com
|
|
9
|
+
Author-email: Nandor Nagy <n3ndor@gmail.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: api,async,greenhouse,hackernews,job-board,jobs,remoteok,remotive
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Requires-Dist: httpx>=0.27
|
|
22
|
+
Requires-Dist: pydantic>=2.5
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
<div align="center">
|
|
30
|
+
|
|
31
|
+
# 📡 jobfeeds
|
|
32
|
+
|
|
33
|
+
**One typed, async Python interface to six public job boards.**
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
pip install jobfeeds
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+

|
|
40
|
+

|
|
41
|
+

|
|
42
|
+

|
|
43
|
+

|
|
44
|
+
|
|
45
|
+
</div>
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
Fetching job postings should not require writing six API clients. jobfeeds
|
|
50
|
+
gives you Remotive, Arbeitnow, Greenhouse boards, RemoteOK, We Work Remotely,
|
|
51
|
+
and the Hacker News "Who is hiring" thread through one interface, normalized
|
|
52
|
+
into one pydantic model, with failures isolated per source.
|
|
53
|
+
|
|
54
|
+
Extracted from, and used in production by,
|
|
55
|
+
[**JobRadar**](https://jobradar.nagysolution.com), a live job-market
|
|
56
|
+
intelligence dashboard.
|
|
57
|
+
|
|
58
|
+
## Quickstart
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import asyncio
|
|
62
|
+
from jobfeeds import Remotive, Greenhouse, fetch_all
|
|
63
|
+
|
|
64
|
+
async def main():
|
|
65
|
+
# One source
|
|
66
|
+
jobs = await Remotive().fetch() # list[Posting]
|
|
67
|
+
print(jobs[0].company, jobs[0].title, jobs[0].url)
|
|
68
|
+
|
|
69
|
+
# Configured source
|
|
70
|
+
jobs = await Greenhouse(boards=["stripe", "anthropic"]).fetch()
|
|
71
|
+
|
|
72
|
+
# Everything, concurrently, failure-isolated
|
|
73
|
+
results = await fetch_all() # list[SourceResult]
|
|
74
|
+
for r in results:
|
|
75
|
+
print(r.source, len(r.postings) if r.ok else f"FAILED: {r.error}")
|
|
76
|
+
|
|
77
|
+
asyncio.run(main())
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## The model
|
|
81
|
+
|
|
82
|
+
Every source normalizes into the same `Posting`:
|
|
83
|
+
|
|
84
|
+
| Field | Notes |
|
|
85
|
+
| --- | --- |
|
|
86
|
+
| `source` | adapter name (`"remotive"`, `"greenhouse"`, ...) |
|
|
87
|
+
| `external_id` | the source's own id |
|
|
88
|
+
| `company`, `title`, `url` | always present |
|
|
89
|
+
| `location_raw` | as the source states it, unparsed |
|
|
90
|
+
| `posted_at` | `datetime` or `None`, when the source provides one |
|
|
91
|
+
| `raw` | source-specific payload; descriptions truncated to 8,000 chars |
|
|
92
|
+
| `.hash` | cross-source dedupe key (normalized company + title + location) |
|
|
93
|
+
|
|
94
|
+
Boards syndicate each other's postings; `posting_hash()` collapses the
|
|
95
|
+
duplicates without trusting any board-specific id.
|
|
96
|
+
|
|
97
|
+
## Sources and their quirks
|
|
98
|
+
|
|
99
|
+
| Adapter | Config | Worth knowing |
|
|
100
|
+
| --- | --- | --- |
|
|
101
|
+
| `Remotive()` | | Curated remote listings, JSON API |
|
|
102
|
+
| `Arbeitnow(pages=3)` | page count | DACH-market coverage; paginates explicitly because the API's own `links.next` can point at filtered views |
|
|
103
|
+
| `Greenhouse(boards=[...], per_board_limit=40, title_pattern=None)` | board tokens, cap, optional title regex | Direct-employer postings (Stripe, Anthropic, GitLab, ...). No global search endpoint exists, you choose boards. A default starter set is included |
|
|
104
|
+
| `RemoteOK()` | | Their terms require a visible link back to remoteok.com in what you build. The API's first element is a legal notice, already skipped |
|
|
105
|
+
| `WeWorkRemotely(categories=[...])` | category slugs | RSS per category; defaults to programming, devops, design, product |
|
|
106
|
+
| `HNWhoIsHiring()` | | Finds the latest monthly thread via Algolia, parses only structured "Company \| Role \| Location" comments; freeform replies are ignored |
|
|
107
|
+
|
|
108
|
+
All adapters accept `timeout=` and `user_agent=`. Public APIs change; every
|
|
109
|
+
adapter parses defensively and skips malformed entries instead of raising.
|
|
110
|
+
|
|
111
|
+
## Failure isolation
|
|
112
|
+
|
|
113
|
+
`fetch_all()` never lets one dead API take down the run:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
results = await fetch_all([Remotive(), Greenhouse(boards=["stripe"])])
|
|
117
|
+
postings = [p for r in results if r.ok for p in r.postings]
|
|
118
|
+
errors = {r.source: r.error for r in results if not r.ok}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Your own adapter joins the party by satisfying the `SourceAdapter` protocol:
|
|
122
|
+
anything with a `name` and an `async fetch() -> list[Posting]`.
|
|
123
|
+
|
|
124
|
+
## Design notes
|
|
125
|
+
|
|
126
|
+
- **httpx + pydantic only.** No framework, no ORM, no surprises in your
|
|
127
|
+
dependency tree.
|
|
128
|
+
- **Fully typed**, ships `py.typed`.
|
|
129
|
+
- **No product logic.** Filtering, enrichment, and storage belong to your
|
|
130
|
+
application; this package only does feed access done well. (JobRadar's
|
|
131
|
+
tech-role filter, LLM enrichment, and database live in
|
|
132
|
+
[its repo](https://github.com/n3ndor/jobradar), on top of this package.)
|
|
133
|
+
- **Tests never touch the network.** Every adapter is tested against mocked
|
|
134
|
+
responses (respx) in CI on Python 3.11, 3.12, and 3.13.
|
|
135
|
+
|
|
136
|
+
## License
|
|
137
|
+
|
|
138
|
+
MIT. Built by [Nandor Nagy](https://github.com/n3ndor) as part of a public
|
|
139
|
+
portfolio; the ingestion layer of [jobradar.nagysolution.com](https://jobradar.nagysolution.com).
|
jobfeeds-0.1.0/README.md
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# 📡 jobfeeds
|
|
4
|
+
|
|
5
|
+
**One typed, async Python interface to six public job boards.**
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
pip install jobfeeds
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+

|
|
12
|
+

|
|
13
|
+

|
|
14
|
+

|
|
15
|
+

|
|
16
|
+
|
|
17
|
+
</div>
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
Fetching job postings should not require writing six API clients. jobfeeds
|
|
22
|
+
gives you Remotive, Arbeitnow, Greenhouse boards, RemoteOK, We Work Remotely,
|
|
23
|
+
and the Hacker News "Who is hiring" thread through one interface, normalized
|
|
24
|
+
into one pydantic model, with failures isolated per source.
|
|
25
|
+
|
|
26
|
+
Extracted from, and used in production by,
|
|
27
|
+
[**JobRadar**](https://jobradar.nagysolution.com), a live job-market
|
|
28
|
+
intelligence dashboard.
|
|
29
|
+
|
|
30
|
+
## Quickstart
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import asyncio
|
|
34
|
+
from jobfeeds import Remotive, Greenhouse, fetch_all
|
|
35
|
+
|
|
36
|
+
async def main():
|
|
37
|
+
# One source
|
|
38
|
+
jobs = await Remotive().fetch() # list[Posting]
|
|
39
|
+
print(jobs[0].company, jobs[0].title, jobs[0].url)
|
|
40
|
+
|
|
41
|
+
# Configured source
|
|
42
|
+
jobs = await Greenhouse(boards=["stripe", "anthropic"]).fetch()
|
|
43
|
+
|
|
44
|
+
# Everything, concurrently, failure-isolated
|
|
45
|
+
results = await fetch_all() # list[SourceResult]
|
|
46
|
+
for r in results:
|
|
47
|
+
print(r.source, len(r.postings) if r.ok else f"FAILED: {r.error}")
|
|
48
|
+
|
|
49
|
+
asyncio.run(main())
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## The model
|
|
53
|
+
|
|
54
|
+
Every source normalizes into the same `Posting`:
|
|
55
|
+
|
|
56
|
+
| Field | Notes |
|
|
57
|
+
| --- | --- |
|
|
58
|
+
| `source` | adapter name (`"remotive"`, `"greenhouse"`, ...) |
|
|
59
|
+
| `external_id` | the source's own id |
|
|
60
|
+
| `company`, `title`, `url` | always present |
|
|
61
|
+
| `location_raw` | as the source states it, unparsed |
|
|
62
|
+
| `posted_at` | `datetime` or `None`, when the source provides one |
|
|
63
|
+
| `raw` | source-specific payload; descriptions truncated to 8,000 chars |
|
|
64
|
+
| `.hash` | cross-source dedupe key (normalized company + title + location) |
|
|
65
|
+
|
|
66
|
+
Boards syndicate each other's postings; `posting_hash()` collapses the
|
|
67
|
+
duplicates without trusting any board-specific id.
|
|
68
|
+
|
|
69
|
+
## Sources and their quirks
|
|
70
|
+
|
|
71
|
+
| Adapter | Config | Worth knowing |
|
|
72
|
+
| --- | --- | --- |
|
|
73
|
+
| `Remotive()` | | Curated remote listings, JSON API |
|
|
74
|
+
| `Arbeitnow(pages=3)` | page count | DACH-market coverage; paginates explicitly because the API's own `links.next` can point at filtered views |
|
|
75
|
+
| `Greenhouse(boards=[...], per_board_limit=40, title_pattern=None)` | board tokens, cap, optional title regex | Direct-employer postings (Stripe, Anthropic, GitLab, ...). No global search endpoint exists, you choose boards. A default starter set is included |
|
|
76
|
+
| `RemoteOK()` | | Their terms require a visible link back to remoteok.com in what you build. The API's first element is a legal notice, already skipped |
|
|
77
|
+
| `WeWorkRemotely(categories=[...])` | category slugs | RSS per category; defaults to programming, devops, design, product |
|
|
78
|
+
| `HNWhoIsHiring()` | | Finds the latest monthly thread via Algolia, parses only structured "Company \| Role \| Location" comments; freeform replies are ignored |
|
|
79
|
+
|
|
80
|
+
All adapters accept `timeout=` and `user_agent=`. Public APIs change; every
|
|
81
|
+
adapter parses defensively and skips malformed entries instead of raising.
|
|
82
|
+
|
|
83
|
+
## Failure isolation
|
|
84
|
+
|
|
85
|
+
`fetch_all()` never lets one dead API take down the run:
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
results = await fetch_all([Remotive(), Greenhouse(boards=["stripe"])])
|
|
89
|
+
postings = [p for r in results if r.ok for p in r.postings]
|
|
90
|
+
errors = {r.source: r.error for r in results if not r.ok}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Your own adapter joins the party by satisfying the `SourceAdapter` protocol:
|
|
94
|
+
anything with a `name` and an `async fetch() -> list[Posting]`.
|
|
95
|
+
|
|
96
|
+
## Design notes
|
|
97
|
+
|
|
98
|
+
- **httpx + pydantic only.** No framework, no ORM, no surprises in your
|
|
99
|
+
dependency tree.
|
|
100
|
+
- **Fully typed**, ships `py.typed`.
|
|
101
|
+
- **No product logic.** Filtering, enrichment, and storage belong to your
|
|
102
|
+
application; this package only does feed access done well. (JobRadar's
|
|
103
|
+
tech-role filter, LLM enrichment, and database live in
|
|
104
|
+
[its repo](https://github.com/n3ndor/jobradar), on top of this package.)
|
|
105
|
+
- **Tests never touch the network.** Every adapter is tested against mocked
|
|
106
|
+
responses (respx) in CI on Python 3.11, 3.12, and 3.13.
|
|
107
|
+
|
|
108
|
+
## License
|
|
109
|
+
|
|
110
|
+
MIT. Built by [Nandor Nagy](https://github.com/n3ndor) as part of a public
|
|
111
|
+
portfolio; the ingestion layer of [jobradar.nagysolution.com](https://jobradar.nagysolution.com).
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "jobfeeds"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "One typed, async Python interface to six public job boards: Remotive, Arbeitnow, Greenhouse, RemoteOK, We Work Remotely, and Hacker News Who is hiring"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
authors = [{ name = "Nandor Nagy", email = "n3ndor@gmail.com" }]
|
|
12
|
+
requires-python = ">=3.11"
|
|
13
|
+
dependencies = [
|
|
14
|
+
"httpx>=0.27",
|
|
15
|
+
"pydantic>=2.5",
|
|
16
|
+
]
|
|
17
|
+
keywords = ["jobs", "job-board", "api", "async", "remotive", "greenhouse", "remoteok", "hackernews"]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Development Status :: 4 - Beta",
|
|
20
|
+
"Intended Audience :: Developers",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Programming Language :: Python :: 3.13",
|
|
25
|
+
"Typing :: Typed",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/n3ndor/jobfeeds"
|
|
30
|
+
Repository = "https://github.com/n3ndor/jobfeeds"
|
|
31
|
+
Changelog = "https://github.com/n3ndor/jobfeeds/blob/main/CHANGELOG.md"
|
|
32
|
+
"Used in production by" = "https://jobradar.nagysolution.com"
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
dev = [
|
|
36
|
+
"pytest>=8.0",
|
|
37
|
+
"pytest-asyncio>=0.23",
|
|
38
|
+
"respx>=0.21",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[tool.hatch.version]
|
|
42
|
+
path = "src/jobfeeds/__init__.py"
|
|
43
|
+
|
|
44
|
+
[tool.pytest.ini_options]
|
|
45
|
+
testpaths = ["tests"]
|
|
46
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""jobfeeds: one typed, async interface to six public job boards.
|
|
2
|
+
|
|
3
|
+
from jobfeeds import Remotive, Greenhouse, fetch_all
|
|
4
|
+
|
|
5
|
+
jobs = await Remotive().fetch() # list[Posting]
|
|
6
|
+
jobs = await Greenhouse(boards=["stripe"]).fetch() # configurable
|
|
7
|
+
results = await fetch_all() # failure-isolated
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .fetch import fetch_all
|
|
11
|
+
from .models import Posting, SourceResult, posting_hash
|
|
12
|
+
from .sources import (
|
|
13
|
+
Arbeitnow,
|
|
14
|
+
Greenhouse,
|
|
15
|
+
HNWhoIsHiring,
|
|
16
|
+
RemoteOK,
|
|
17
|
+
Remotive,
|
|
18
|
+
SourceAdapter,
|
|
19
|
+
WeWorkRemotely,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__version__ = "0.1.0"
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"Arbeitnow",
|
|
26
|
+
"Greenhouse",
|
|
27
|
+
"HNWhoIsHiring",
|
|
28
|
+
"Posting",
|
|
29
|
+
"RemoteOK",
|
|
30
|
+
"Remotive",
|
|
31
|
+
"SourceAdapter",
|
|
32
|
+
"SourceResult",
|
|
33
|
+
"WeWorkRemotely",
|
|
34
|
+
"fetch_all",
|
|
35
|
+
"posting_hash",
|
|
36
|
+
"__version__",
|
|
37
|
+
]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Shared text helpers, internal."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
MAX_DESCRIPTION_CHARS = 8000
|
|
8
|
+
|
|
9
|
+
_TAG_RE = re.compile(r"<[^>]+>")
|
|
10
|
+
|
|
11
|
+
_ENTITIES = (
|
|
12
|
+
("&", "&"),
|
|
13
|
+
("<", "<"),
|
|
14
|
+
(">", ">"),
|
|
15
|
+
("'", "'"),
|
|
16
|
+
("'", "'"),
|
|
17
|
+
("/", "/"),
|
|
18
|
+
(""", '"'),
|
|
19
|
+
(" ", " "),
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def decode_entities(text: str) -> str:
|
|
24
|
+
for entity, char in _ENTITIES:
|
|
25
|
+
text = text.replace(entity, char)
|
|
26
|
+
return text
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def strip_html(html: str) -> str:
|
|
30
|
+
return re.sub(r"\s+", " ", decode_entities(_TAG_RE.sub(" ", html))).strip()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def truncate(text: str | None) -> str:
|
|
34
|
+
return (text or "")[:MAX_DESCRIPTION_CHARS]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Fetch several sources concurrently with per-source failure isolation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from typing import Sequence
|
|
7
|
+
|
|
8
|
+
from .models import SourceResult
|
|
9
|
+
from .sources import (
|
|
10
|
+
Arbeitnow,
|
|
11
|
+
Greenhouse,
|
|
12
|
+
HNWhoIsHiring,
|
|
13
|
+
RemoteOK,
|
|
14
|
+
Remotive,
|
|
15
|
+
SourceAdapter,
|
|
16
|
+
WeWorkRemotely,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def default_sources() -> list[SourceAdapter]:
|
|
21
|
+
"""All six adapters with their default configuration."""
|
|
22
|
+
return [
|
|
23
|
+
Remotive(),
|
|
24
|
+
Arbeitnow(),
|
|
25
|
+
Greenhouse(),
|
|
26
|
+
RemoteOK(),
|
|
27
|
+
WeWorkRemotely(),
|
|
28
|
+
HNWhoIsHiring(),
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
async def fetch_all(sources: Sequence[SourceAdapter] | None = None) -> list[SourceResult]:
|
|
33
|
+
"""Fetch every source concurrently. One failing source never takes down
|
|
34
|
+
the run: each adapter's outcome (postings or error) is reported in its
|
|
35
|
+
own SourceResult, in the same order as `sources`."""
|
|
36
|
+
adapters = list(sources) if sources is not None else default_sources()
|
|
37
|
+
|
|
38
|
+
async def run(adapter: SourceAdapter) -> SourceResult:
|
|
39
|
+
try:
|
|
40
|
+
postings = await adapter.fetch()
|
|
41
|
+
return SourceResult(source=adapter.name, postings=postings)
|
|
42
|
+
except Exception as exc: # noqa: BLE001 - isolation is the point
|
|
43
|
+
return SourceResult(source=adapter.name, error=f"{type(exc).__name__}: {exc}")
|
|
44
|
+
|
|
45
|
+
return list(await asyncio.gather(*(run(a) for a in adapters)))
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Public data models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import re
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _normalize(text: str) -> str:
|
|
13
|
+
return re.sub(r"\s+", " ", text).strip().lower()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def posting_hash(company: str, title: str, location_raw: str) -> str:
|
|
17
|
+
"""Cross-source dedupe key: same company + title + normalized location.
|
|
18
|
+
|
|
19
|
+
Boards syndicate each other's postings; hashing the normalized triple
|
|
20
|
+
lets you collapse duplicates across sources without trusting any
|
|
21
|
+
board-specific id.
|
|
22
|
+
"""
|
|
23
|
+
key = "|".join((_normalize(company), _normalize(title), _normalize(location_raw)))
|
|
24
|
+
return hashlib.sha256(key.encode("utf-8")).hexdigest()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Posting(BaseModel):
|
|
28
|
+
"""One job posting, normalized across all sources.
|
|
29
|
+
|
|
30
|
+
`raw` carries the source-specific payload (descriptions are truncated to
|
|
31
|
+
a bounded size); everything else is the common denominator every adapter
|
|
32
|
+
can fill.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
source: str
|
|
36
|
+
external_id: str
|
|
37
|
+
company: str
|
|
38
|
+
title: str
|
|
39
|
+
url: HttpUrl
|
|
40
|
+
location_raw: str = ""
|
|
41
|
+
posted_at: datetime | None = None
|
|
42
|
+
raw: dict = Field(default_factory=dict)
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def hash(self) -> str:
|
|
46
|
+
return posting_hash(self.company, self.title, self.location_raw)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SourceResult(BaseModel):
|
|
50
|
+
"""Outcome of one adapter's fetch, success or failure."""
|
|
51
|
+
|
|
52
|
+
source: str
|
|
53
|
+
postings: list[Posting] = Field(default_factory=list)
|
|
54
|
+
error: str | None = None
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def ok(self) -> bool:
|
|
58
|
+
return self.error is None
|
|
File without changes
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .arbeitnow import Arbeitnow
|
|
2
|
+
from .base import SourceAdapter
|
|
3
|
+
from .greenhouse import Greenhouse
|
|
4
|
+
from .hn_hiring import HNWhoIsHiring
|
|
5
|
+
from .remoteok import RemoteOK
|
|
6
|
+
from .remotive import Remotive
|
|
7
|
+
from .wwr import WeWorkRemotely
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Arbeitnow",
|
|
11
|
+
"Greenhouse",
|
|
12
|
+
"HNWhoIsHiring",
|
|
13
|
+
"RemoteOK",
|
|
14
|
+
"Remotive",
|
|
15
|
+
"SourceAdapter",
|
|
16
|
+
"WeWorkRemotely",
|
|
17
|
+
]
|