jobdatapool-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jobdatapool_mcp-0.1.0/LICENSE +21 -0
- jobdatapool_mcp-0.1.0/PKG-INFO +204 -0
- jobdatapool_mcp-0.1.0/README.md +179 -0
- jobdatapool_mcp-0.1.0/pyproject.toml +38 -0
- jobdatapool_mcp-0.1.0/setup.cfg +4 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/__init__.py +3 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/cache.py +69 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/company.py +45 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/config.py +19 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/http.py +18 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/masking.py +65 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/models.py +58 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/search.py +143 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/server.py +200 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp/sources.py +86 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp.egg-info/PKG-INFO +204 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp.egg-info/SOURCES.txt +19 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp.egg-info/dependency_links.txt +1 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp.egg-info/entry_points.txt +2 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp.egg-info/requires.txt +2 -0
- jobdatapool_mcp-0.1.0/src/jobdatapool_mcp.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 JobDataPool
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jobdatapool-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for JobDataPool job search, source provenance, and analyst workflows
|
|
5
|
+
Author: JobDataPool
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://jobdatapool.com/mcp/
|
|
8
|
+
Project-URL: Documentation, https://jobdatapool.com/mcp/
|
|
9
|
+
Project-URL: API, https://jobdatapool.com/docs/api/
|
|
10
|
+
Keywords: mcp,model-context-protocol,jobs,jobdatapool,job-search
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: mcp<2,>=1.0
|
|
23
|
+
Requires-Dist: requests>=2.32
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# jobdatapool-mcp
|
|
27
|
+
|
|
28
|
+
A local Model Context Protocol server for JobDataPool job listings, source provenance, dataset QA, company briefs, and market scans.
|
|
29
|
+
|
|
30
|
+
This Python package is a parity-oriented port of the Node JobDataPool MCP surface. It exposes the same six core tools, three resources, and three guided prompts described on the JobDataPool MCP page.
|
|
31
|
+
|
|
32
|
+
## Requirements
|
|
33
|
+
|
|
34
|
+
- Python 3.10+
|
|
35
|
+
- An MCP-compatible client that can launch a local stdio server, such as Claude Desktop, Claude Code, Cursor, or Windsurf
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install jobdatapool-mcp
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Run locally
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
jobdatapool-mcp
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
The server uses `https://jobdatapool.com` by default.
|
|
50
|
+
|
|
51
|
+
For local site testing, point it at your local JobDataPool build:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
JOBDATAPOOL_BASE_URL=http://localhost:8888 jobdatapool-mcp
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
The dataset snapshot is cached for 10 minutes by default. Override that with:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
JOBDATAPOOL_CACHE_TTL_SECONDS=60 jobdatapool-mcp
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## MCP client config
|
|
64
|
+
|
|
65
|
+
### Claude Desktop / Claude Code / Cursor / Windsurf
|
|
66
|
+
|
|
67
|
+
After installing from PyPI, use the package command directly:
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{
|
|
71
|
+
"mcpServers": {
|
|
72
|
+
"jobdatapool": {
|
|
73
|
+
"command": "jobdatapool-mcp",
|
|
74
|
+
"env": {
|
|
75
|
+
"JOBDATAPOOL_BASE_URL": "https://jobdatapool.com"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
For a local checkout before publishing, run through Python instead:
|
|
83
|
+
|
|
84
|
+
```json
|
|
85
|
+
{
|
|
86
|
+
"mcpServers": {
|
|
87
|
+
"jobdatapool": {
|
|
88
|
+
"command": "python",
|
|
89
|
+
"args": ["-m", "jobdatapool_mcp.server"],
|
|
90
|
+
"cwd": "/absolute/path/to/jobdatapool-mcp",
|
|
91
|
+
"env": {
|
|
92
|
+
"JOBDATAPOOL_BASE_URL": "https://jobdatapool.com"
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Tools
|
|
100
|
+
|
|
101
|
+
### `search_jobs`
|
|
102
|
+
|
|
103
|
+
Search the current JobDataPool snapshot by keyword and filters.
|
|
104
|
+
|
|
105
|
+
Useful arguments include:
|
|
106
|
+
|
|
107
|
+
- `query`
|
|
108
|
+
- `limit`
|
|
109
|
+
- `location`
|
|
110
|
+
- `country`
|
|
111
|
+
- `company`
|
|
112
|
+
- `industry`
|
|
113
|
+
- `skill`
|
|
114
|
+
- `seniority`
|
|
115
|
+
- `employment_type`
|
|
116
|
+
- `remote`
|
|
117
|
+
- `posted_within_days`
|
|
118
|
+
- `salary`
|
|
119
|
+
- `sort`
|
|
120
|
+
|
|
121
|
+
Example prompts:
|
|
122
|
+
|
|
123
|
+
```text
|
|
124
|
+
Find remote senior Python security jobs in the US.
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
```text
|
|
128
|
+
Search JobDataPool for data engineering roles in healthcare, limit to 15, and include listing ids.
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
```text
|
|
132
|
+
Find remote product manager jobs with salary signals and summarize the strongest matches.
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### `get_job`
|
|
136
|
+
|
|
137
|
+
Return one listing by JobDataPool id. Raw source and application URLs are masked; results include the JobDataPool redirect handoff URL when an id is available.
|
|
138
|
+
|
|
139
|
+
```text
|
|
140
|
+
Tell me more about listing id 12345.
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### `get_company`
|
|
144
|
+
|
|
145
|
+
Summarize a company footprint from matching listings in the current snapshot.
|
|
146
|
+
|
|
147
|
+
```text
|
|
148
|
+
Brief me on Huntress in the current JobDataPool snapshot.
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
The response includes matching listing count, locations, seniority, industries, skills, examples, and snapshot caveats.
|
|
152
|
+
|
|
153
|
+
### `get_filter_options`
|
|
154
|
+
|
|
155
|
+
Show high-count facet values for common filters.
|
|
156
|
+
|
|
157
|
+
```text
|
|
158
|
+
What skills, countries, locations, industries, and companies are common in this dataset?
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### `find_similar_jobs`
|
|
162
|
+
|
|
163
|
+
Find listings similar to a known JobDataPool listing id using title, skill, company, industry, and summary overlap.
|
|
164
|
+
|
|
165
|
+
```text
|
|
166
|
+
Find jobs similar to listing id 12345.
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### `get_sources`
|
|
170
|
+
|
|
171
|
+
Return the JobDataPool source catalog, contract pointers, and local MCP cache status.
|
|
172
|
+
|
|
173
|
+
```text
|
|
174
|
+
What snapshot is this MCP reading, and how many rows are cached?
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Resources
|
|
178
|
+
|
|
179
|
+
- `jobdatapool://source-catalog` — live `/v1/sources` catalog plus cache status
|
|
180
|
+
- `jobdatapool://analyst-context` — dataset scope, cache behavior, provenance guidance, caveats, and URL masking policy
|
|
181
|
+
- `jobdatapool://contracts` — OpenAPI, schema, dataset, v1 endpoint, tool, resource, and prompt pointers
|
|
182
|
+
|
|
183
|
+
## Prompts
|
|
184
|
+
|
|
185
|
+
- `jobdatapool_market_scan(topic, location, remote)` — role, skill, industry, or hiring-theme scan
|
|
186
|
+
- `jobdatapool_company_brief(company)` — company footprint summary workflow
|
|
187
|
+
- `jobdatapool_dataset_qa()` — source/catalog health and facet coverage workflow
|
|
188
|
+
|
|
189
|
+
## URL masking behavior
|
|
190
|
+
|
|
191
|
+
The server does not return raw source or application URLs. It removes common origin URL fields and replaces embedded external URLs in scraped text with `[masked external URL]`. When a JobDataPool id is present, outputs include:
|
|
192
|
+
|
|
193
|
+
```text
|
|
194
|
+
https://jobdatapool.com/jobrd?id=<id>
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Publishing
|
|
198
|
+
|
|
199
|
+
This repository is wired for PyPI Trusted Publishing through GitHub Actions. Configure a PyPI Trusted Publisher for the repository and the `release` environment, then push a tag:
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
git tag v0.1.0
|
|
203
|
+
git push origin v0.1.0
|
|
204
|
+
```
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# jobdatapool-mcp
|
|
2
|
+
|
|
3
|
+
A local Model Context Protocol server for JobDataPool job listings, source provenance, dataset QA, company briefs, and market scans.
|
|
4
|
+
|
|
5
|
+
This Python package is a parity-oriented port of the Node JobDataPool MCP surface. It exposes the same six core tools, three resources, and three guided prompts described on the JobDataPool MCP page.
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- Python 3.10+
|
|
10
|
+
- An MCP-compatible client that can launch a local stdio server, such as Claude Desktop, Claude Code, Cursor, or Windsurf
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install jobdatapool-mcp
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Run locally
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
jobdatapool-mcp
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
The server uses `https://jobdatapool.com` by default.
|
|
25
|
+
|
|
26
|
+
For local site testing, point it at your local JobDataPool build:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
JOBDATAPOOL_BASE_URL=http://localhost:8888 jobdatapool-mcp
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
The dataset snapshot is cached for 10 minutes by default. Override that with:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
JOBDATAPOOL_CACHE_TTL_SECONDS=60 jobdatapool-mcp
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## MCP client config
|
|
39
|
+
|
|
40
|
+
### Claude Desktop / Claude Code / Cursor / Windsurf
|
|
41
|
+
|
|
42
|
+
After installing from PyPI, use the package command directly:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"mcpServers": {
|
|
47
|
+
"jobdatapool": {
|
|
48
|
+
"command": "jobdatapool-mcp",
|
|
49
|
+
"env": {
|
|
50
|
+
"JOBDATAPOOL_BASE_URL": "https://jobdatapool.com"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
For a local checkout before publishing, run through Python instead:
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"mcpServers": {
|
|
62
|
+
"jobdatapool": {
|
|
63
|
+
"command": "python",
|
|
64
|
+
"args": ["-m", "jobdatapool_mcp.server"],
|
|
65
|
+
"cwd": "/absolute/path/to/jobdatapool-mcp",
|
|
66
|
+
"env": {
|
|
67
|
+
"JOBDATAPOOL_BASE_URL": "https://jobdatapool.com"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Tools
|
|
75
|
+
|
|
76
|
+
### `search_jobs`
|
|
77
|
+
|
|
78
|
+
Search the current JobDataPool snapshot by keyword and filters.
|
|
79
|
+
|
|
80
|
+
Useful arguments include:
|
|
81
|
+
|
|
82
|
+
- `query`
|
|
83
|
+
- `limit`
|
|
84
|
+
- `location`
|
|
85
|
+
- `country`
|
|
86
|
+
- `company`
|
|
87
|
+
- `industry`
|
|
88
|
+
- `skill`
|
|
89
|
+
- `seniority`
|
|
90
|
+
- `employment_type`
|
|
91
|
+
- `remote`
|
|
92
|
+
- `posted_within_days`
|
|
93
|
+
- `salary`
|
|
94
|
+
- `sort`
|
|
95
|
+
|
|
96
|
+
Example prompts:
|
|
97
|
+
|
|
98
|
+
```text
|
|
99
|
+
Find remote senior Python security jobs in the US.
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
```text
|
|
103
|
+
Search JobDataPool for data engineering roles in healthcare, limit to 15, and include listing ids.
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
```text
|
|
107
|
+
Find remote product manager jobs with salary signals and summarize the strongest matches.
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### `get_job`
|
|
111
|
+
|
|
112
|
+
Return one listing by JobDataPool id. Raw source and application URLs are masked; results include the JobDataPool redirect handoff URL when an id is available.
|
|
113
|
+
|
|
114
|
+
```text
|
|
115
|
+
Tell me more about listing id 12345.
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### `get_company`
|
|
119
|
+
|
|
120
|
+
Summarize a company footprint from matching listings in the current snapshot.
|
|
121
|
+
|
|
122
|
+
```text
|
|
123
|
+
Brief me on Huntress in the current JobDataPool snapshot.
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
The response includes matching listing count, locations, seniority, industries, skills, examples, and snapshot caveats.
|
|
127
|
+
|
|
128
|
+
### `get_filter_options`
|
|
129
|
+
|
|
130
|
+
Show high-count facet values for common filters.
|
|
131
|
+
|
|
132
|
+
```text
|
|
133
|
+
What skills, countries, locations, industries, and companies are common in this dataset?
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### `find_similar_jobs`
|
|
137
|
+
|
|
138
|
+
Find listings similar to a known JobDataPool listing id using title, skill, company, industry, and summary overlap.
|
|
139
|
+
|
|
140
|
+
```text
|
|
141
|
+
Find jobs similar to listing id 12345.
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### `get_sources`
|
|
145
|
+
|
|
146
|
+
Return the JobDataPool source catalog, contract pointers, and local MCP cache status.
|
|
147
|
+
|
|
148
|
+
```text
|
|
149
|
+
What snapshot is this MCP reading, and how many rows are cached?
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Resources
|
|
153
|
+
|
|
154
|
+
- `jobdatapool://source-catalog` — live `/v1/sources` catalog plus cache status
|
|
155
|
+
- `jobdatapool://analyst-context` — dataset scope, cache behavior, provenance guidance, caveats, and URL masking policy
|
|
156
|
+
- `jobdatapool://contracts` — OpenAPI, schema, dataset, v1 endpoint, tool, resource, and prompt pointers
|
|
157
|
+
|
|
158
|
+
## Prompts
|
|
159
|
+
|
|
160
|
+
- `jobdatapool_market_scan(topic, location, remote)` — role, skill, industry, or hiring-theme scan
|
|
161
|
+
- `jobdatapool_company_brief(company)` — company footprint summary workflow
|
|
162
|
+
- `jobdatapool_dataset_qa()` — source/catalog health and facet coverage workflow
|
|
163
|
+
|
|
164
|
+
## URL masking behavior
|
|
165
|
+
|
|
166
|
+
The server does not return raw source or application URLs. It removes common origin URL fields and replaces embedded external URLs in scraped text with `[masked external URL]`. When a JobDataPool id is present, outputs include:
|
|
167
|
+
|
|
168
|
+
```text
|
|
169
|
+
https://jobdatapool.com/jobrd?id=<id>
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Publishing
|
|
173
|
+
|
|
174
|
+
This repository is wired for PyPI Trusted Publishing through GitHub Actions. Configure a PyPI Trusted Publisher for the repository and the `release` environment, then push a tag:
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
git tag v0.1.0
|
|
178
|
+
git push origin v0.1.0
|
|
179
|
+
```
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "jobdatapool-mcp"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "MCP server for JobDataPool job search, source provenance, and analyst workflows"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [{ name = "JobDataPool" }]
|
|
9
|
+
keywords = ["mcp", "model-context-protocol", "jobs", "jobdatapool", "job-search"]
|
|
10
|
+
dependencies = [
|
|
11
|
+
"mcp>=1.0,<2",
|
|
12
|
+
"requests>=2.32"
|
|
13
|
+
]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://jobdatapool.com/mcp/"
|
|
27
|
+
Documentation = "https://jobdatapool.com/mcp/"
|
|
28
|
+
API = "https://jobdatapool.com/docs/api/"
|
|
29
|
+
|
|
30
|
+
[project.scripts]
|
|
31
|
+
jobdatapool-mcp = "jobdatapool_mcp.server:main"
|
|
32
|
+
|
|
33
|
+
[build-system]
|
|
34
|
+
requires = ["setuptools>=69", "wheel"]
|
|
35
|
+
build-backend = "setuptools.build_meta"
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.packages.find]
|
|
38
|
+
where = ["src"]
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from .config import cache_ttl_seconds
|
|
8
|
+
from .http import get_json
|
|
9
|
+
from .sources import candidate_dataset_urls, get_source_catalog
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DatasetCache:
|
|
14
|
+
rows: list[dict[str, Any]] = field(default_factory=list)
|
|
15
|
+
source_catalog: Any | None = None
|
|
16
|
+
dataset_url: str | None = None
|
|
17
|
+
loaded_at: float = 0.0
|
|
18
|
+
error: str | None = None
|
|
19
|
+
|
|
20
|
+
def fresh(self) -> bool:
|
|
21
|
+
ttl = cache_ttl_seconds()
|
|
22
|
+
return bool(self.rows) and ttl > 0 and (time.time() - self.loaded_at) < ttl
|
|
23
|
+
|
|
24
|
+
def status(self) -> dict[str, Any]:
|
|
25
|
+
return {
|
|
26
|
+
"loaded_rows": len(self.rows),
|
|
27
|
+
"dataset_url": self.dataset_url,
|
|
28
|
+
"loaded_at": self.loaded_at or None,
|
|
29
|
+
"ttl_seconds": cache_ttl_seconds(),
|
|
30
|
+
"fresh": self.fresh(),
|
|
31
|
+
"last_error": self.error,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_CACHE = DatasetCache()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _extract_rows(payload: Any) -> list[dict[str, Any]]:
|
|
39
|
+
if isinstance(payload, list):
|
|
40
|
+
rows = payload
|
|
41
|
+
elif isinstance(payload, dict):
|
|
42
|
+
rows = payload.get("jobs") or payload.get("results") or payload.get("data") or payload.get("rows") or payload.get("listings") or []
|
|
43
|
+
else:
|
|
44
|
+
rows = []
|
|
45
|
+
return [row for row in rows if isinstance(row, dict)]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def load_dataset(force: bool = False) -> DatasetCache:
|
|
49
|
+
if _CACHE.fresh() and not force:
|
|
50
|
+
return _CACHE
|
|
51
|
+
try:
|
|
52
|
+
_CACHE.source_catalog = get_source_catalog()
|
|
53
|
+
except Exception as exc: # noqa: BLE001
|
|
54
|
+
_CACHE.error = f"source catalog unavailable: {exc}"
|
|
55
|
+
last_error = None
|
|
56
|
+
for url in candidate_dataset_urls(_CACHE.source_catalog):
|
|
57
|
+
try:
|
|
58
|
+
payload = get_json(url, timeout=30)
|
|
59
|
+
rows = _extract_rows(payload)
|
|
60
|
+
if rows:
|
|
61
|
+
_CACHE.rows = rows
|
|
62
|
+
_CACHE.dataset_url = url
|
|
63
|
+
_CACHE.loaded_at = time.time()
|
|
64
|
+
_CACHE.error = None
|
|
65
|
+
return _CACHE
|
|
66
|
+
except Exception as exc: # noqa: BLE001
|
|
67
|
+
last_error = f"{url}: {exc}"
|
|
68
|
+
_CACHE.error = last_error or _CACHE.error or "No dataset rows loaded."
|
|
69
|
+
return _CACHE
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from .models import as_list, get_first, text_value
|
|
7
|
+
from .masking import public_job
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def company_brief(rows: list[dict[str, Any]], company: str, limit_examples: int = 5) -> dict[str, Any]:
|
|
11
|
+
needle = company.lower()
|
|
12
|
+
matches = [row for row in rows if needle in text_value(row.get("company")).lower()]
|
|
13
|
+
locations: Counter[str] = Counter()
|
|
14
|
+
seniority: Counter[str] = Counter()
|
|
15
|
+
industries: Counter[str] = Counter()
|
|
16
|
+
skills: Counter[str] = Counter()
|
|
17
|
+
for row in matches:
|
|
18
|
+
for value, counter in (
|
|
19
|
+
(row.get("location"), locations),
|
|
20
|
+
(row.get("seniority"), seniority),
|
|
21
|
+
(row.get("industry") or row.get("industries"), industries),
|
|
22
|
+
(row.get("skills"), skills),
|
|
23
|
+
):
|
|
24
|
+
for item in as_list(value):
|
|
25
|
+
text = text_value(item)
|
|
26
|
+
if text:
|
|
27
|
+
counter[text] += 1
|
|
28
|
+
examples = []
|
|
29
|
+
for row in matches[: max(1, min(limit_examples, 20))]:
|
|
30
|
+
examples.append(public_job({
|
|
31
|
+
"id": get_first(row, "id", "job_id", "listing_id", "jobdatapool_id"),
|
|
32
|
+
"title": row.get("title"),
|
|
33
|
+
"company": row.get("company"),
|
|
34
|
+
"location": row.get("location"),
|
|
35
|
+
}))
|
|
36
|
+
return {
|
|
37
|
+
"company_query": company,
|
|
38
|
+
"matching_listings": len(matches),
|
|
39
|
+
"locations": dict(locations.most_common(15)),
|
|
40
|
+
"seniority": dict(seniority.most_common(15)),
|
|
41
|
+
"industries": dict(industries.most_common(15)),
|
|
42
|
+
"skills": dict(skills.most_common(20)),
|
|
43
|
+
"examples": examples,
|
|
44
|
+
"caveat": "This brief is derived from the current JobDataPool snapshot loaded by the MCP cache.",
|
|
45
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
DEFAULT_BASE_URL = "https://jobdatapool.com"
|
|
6
|
+
DEFAULT_CACHE_TTL_SECONDS = 600
|
|
7
|
+
USER_AGENT = "jobdatapool-mcp/0.1.0 (+https://jobdatapool.com/mcp/)"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def base_url() -> str:
|
|
11
|
+
return os.getenv("JOBDATAPOOL_BASE_URL", DEFAULT_BASE_URL).rstrip("/")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def cache_ttl_seconds() -> int:
|
|
15
|
+
raw = os.getenv("JOBDATAPOOL_CACHE_TTL_SECONDS", str(DEFAULT_CACHE_TTL_SECONDS))
|
|
16
|
+
try:
|
|
17
|
+
return max(0, int(raw))
|
|
18
|
+
except ValueError:
|
|
19
|
+
return DEFAULT_CACHE_TTL_SECONDS
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from .config import USER_AGENT
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_json(url: str, *, params: dict[str, Any] | None = None, timeout: int = 20) -> Any:
|
|
11
|
+
response = requests.get(
|
|
12
|
+
url,
|
|
13
|
+
params=params,
|
|
14
|
+
timeout=timeout,
|
|
15
|
+
headers={"Accept": "application/json", "User-Agent": USER_AGENT},
|
|
16
|
+
)
|
|
17
|
+
response.raise_for_status()
|
|
18
|
+
return response.json()
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from .config import base_url
|
|
7
|
+
|
|
8
|
+
_URL_RE = re.compile(r"https?://[^\s)\]}>\"']+", re.IGNORECASE)
|
|
9
|
+
|
|
10
|
+
RAW_URL_KEYS = {
|
|
11
|
+
"url",
|
|
12
|
+
"source_url",
|
|
13
|
+
"application_url",
|
|
14
|
+
"apply_url",
|
|
15
|
+
"job_url",
|
|
16
|
+
"external_url",
|
|
17
|
+
"original_url",
|
|
18
|
+
"canonical_url",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def job_id(job: dict[str, Any]) -> str | None:
|
|
23
|
+
for key in ("id", "job_id", "listing_id", "jobdatapool_id", "uuid"):
|
|
24
|
+
value = job.get(key)
|
|
25
|
+
if value is not None and str(value).strip():
|
|
26
|
+
return str(value).strip()
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def redirect_url(job_or_id: dict[str, Any] | str | int | None) -> str | None:
|
|
31
|
+
if isinstance(job_or_id, dict):
|
|
32
|
+
jid = job_id(job_or_id)
|
|
33
|
+
elif job_or_id is None:
|
|
34
|
+
jid = None
|
|
35
|
+
else:
|
|
36
|
+
jid = str(job_or_id).strip()
|
|
37
|
+
return f"{base_url()}/jobrd?id={jid}" if jid else None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def sanitize_text(value: Any) -> Any:
|
|
41
|
+
if not isinstance(value, str):
|
|
42
|
+
return value
|
|
43
|
+
return _URL_RE.sub("[masked external URL]", value)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def public_job(job: dict[str, Any]) -> dict[str, Any]:
|
|
47
|
+
"""Return a listing payload with raw origin/application URLs removed."""
|
|
48
|
+
output: dict[str, Any] = {}
|
|
49
|
+
for key, value in job.items():
|
|
50
|
+
lower = key.lower()
|
|
51
|
+
if lower in RAW_URL_KEYS or lower.endswith("_url") or lower.endswith("Url"):
|
|
52
|
+
continue
|
|
53
|
+
if isinstance(value, str):
|
|
54
|
+
output[key] = sanitize_text(value)
|
|
55
|
+
elif isinstance(value, list):
|
|
56
|
+
output[key] = [sanitize_text(item) for item in value]
|
|
57
|
+
elif isinstance(value, dict):
|
|
58
|
+
output[key] = {k: sanitize_text(v) for k, v in value.items() if not str(k).lower().endswith("url")}
|
|
59
|
+
else:
|
|
60
|
+
output[key] = value
|
|
61
|
+
jid = job_id(job)
|
|
62
|
+
if jid:
|
|
63
|
+
output.setdefault("id", jid)
|
|
64
|
+
output["jobdatapool_url"] = redirect_url(jid)
|
|
65
|
+
return output
|