recruitertriage 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recruitertriage-0.1.0/.gitignore +12 -0
- recruitertriage-0.1.0/LICENSE +21 -0
- recruitertriage-0.1.0/PKG-INFO +133 -0
- recruitertriage-0.1.0/README.md +96 -0
- recruitertriage-0.1.0/examples/demo.py +72 -0
- recruitertriage-0.1.0/pyproject.toml +51 -0
- recruitertriage-0.1.0/space/app.py +118 -0
- recruitertriage-0.1.0/space/requirements.txt +5 -0
- recruitertriage-0.1.0/src/recruitertriage/__init__.py +41 -0
- recruitertriage-0.1.0/src/recruitertriage/core.py +253 -0
- recruitertriage-0.1.0/src/recruitertriage/smollm.py +76 -0
- recruitertriage-0.1.0/tests/test_core.py +141 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mukunda Rao Katta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: recruitertriage
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Triage recruiter outreach with a small (<1B) language model. Built for the HuggingFace Build Small Hackathon.
|
|
5
|
+
Project-URL: Homepage, https://github.com/MukundaKatta/recruitertriage
|
|
6
|
+
Project-URL: Source, https://github.com/MukundaKatta/recruitertriage
|
|
7
|
+
Project-URL: Issues, https://github.com/MukundaKatta/recruitertriage/issues
|
|
8
|
+
Author-email: Mukunda Rao Katta <mukunda.vjcs6@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,huggingface,inbox,llm,recruiter,small-models,smollm,triage
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
27
|
+
Provides-Extra: smollm
|
|
28
|
+
Requires-Dist: accelerate>=0.30; extra == 'smollm'
|
|
29
|
+
Requires-Dist: torch>=2.2; extra == 'smollm'
|
|
30
|
+
Requires-Dist: transformers>=4.44; extra == 'smollm'
|
|
31
|
+
Provides-Extra: space
|
|
32
|
+
Requires-Dist: accelerate>=0.30; extra == 'space'
|
|
33
|
+
Requires-Dist: gradio>=4.40; extra == 'space'
|
|
34
|
+
Requires-Dist: torch>=2.2; extra == 'space'
|
|
35
|
+
Requires-Dist: transformers>=4.44; extra == 'space'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# recruitertriage
|
|
39
|
+
|
|
40
|
+
Triage recruiter outreach with a small (<1B) language model. Built
|
|
41
|
+
for the [HuggingFace Build Small Hackathon][bs].
|
|
42
|
+
|
|
43
|
+
[bs]: https://huggingface.co/Build-Small-Hackathon
|
|
44
|
+
|
|
45
|
+
The default backend is
|
|
46
|
+
[SmolLM2-360M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct).
|
|
47
|
+
It fits on a free HF Space CPU and still gets useful triage signal.
|
|
48
|
+
You can swap in any callable LM (a fine-tune, Qwen-0.5B, a
|
|
49
|
+
heuristic, anything that maps `prompt -> string`).
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install recruitertriage # core only (zero heavy deps)
|
|
55
|
+
pip install "recruitertriage[smollm]" # + SmolLM2 / transformers
|
|
56
|
+
pip install "recruitertriage[space]" # + Gradio for the Space UI
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Python 3.10+.
|
|
60
|
+
|
|
61
|
+
## What it does
|
|
62
|
+
|
|
63
|
+
Each piece of recruiter outreach gets bucketed into one of five labels:
|
|
64
|
+
|
|
65
|
+
| label | meaning |
|
|
66
|
+
|--------------|------------------------------------------------------|
|
|
67
|
+
| `interview` | looks like a real fit, schedule a call |
|
|
68
|
+
| `needs_info` | interesting but missing role/comp/level |
|
|
69
|
+
| `reject` | clearly off (wrong stack, wrong level) |
|
|
70
|
+
| `spam` | not a real role (vendor sales, lead-gen, scam) |
|
|
71
|
+
| `unsure` | low confidence — falls through to a human |
|
|
72
|
+
|
|
73
|
+
Each `Decision` carries a label, a confidence (0..1), a one-line
|
|
74
|
+
reason, an optional suggested reply, and the raw signals the model
|
|
75
|
+
saw.
|
|
76
|
+
|
|
77
|
+
## Usage
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from recruitertriage import triage, HeuristicLLM
|
|
81
|
+
|
|
82
|
+
# Zero-dep fallback (good for tests/CI):
|
|
83
|
+
d = triage(
|
|
84
|
+
subject="Staff ML Engineer @ AcmeAI",
|
|
85
|
+
body="Hi - we're hiring a Staff ML Engineer. Comp 250-320k...",
|
|
86
|
+
llm=HeuristicLLM(),
|
|
87
|
+
)
|
|
88
|
+
print(d.label, d.confidence, d.suggested_reply)
|
|
89
|
+
|
|
90
|
+
# Real small-model backend (requires the `smollm` extra):
|
|
91
|
+
from recruitertriage.smollm import make_smollm
|
|
92
|
+
llm = make_smollm("HuggingFaceTB/SmolLM2-360M-Instruct")
|
|
93
|
+
d = triage(subject="...", body="...", llm=llm)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
`triage()` only cares that `llm` is `Callable[[str], str]`, so any
|
|
97
|
+
small instruction-tuned LM works. The core parser is tolerant of
|
|
98
|
+
small-model JSON quirks (code fences, prose around the object, etc.).
|
|
99
|
+
|
|
100
|
+
## HuggingFace Space
|
|
101
|
+
|
|
102
|
+
The `space/` directory is the deployable Gradio app:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pip install "recruitertriage[space]"
|
|
106
|
+
python space/app.py
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
To publish:
|
|
110
|
+
|
|
111
|
+
1. `hf login`
|
|
112
|
+
2. Create a new Gradio Space (CPU basic is enough)
|
|
113
|
+
3. Push the contents of `space/` as the Space root
|
|
114
|
+
|
|
115
|
+
## Demo
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
python examples/demo.py # uses HeuristicLLM
|
|
119
|
+
python examples/demo.py --smollm # uses SmolLM2-360M-Instruct
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Companion libraries
|
|
123
|
+
|
|
124
|
+
`recruitertriage` slots into the @mukundakatta agent-stack:
|
|
125
|
+
|
|
126
|
+
- [agentleash](https://github.com/MukundaKatta/agentleash) — USD/call budget cap + tool-arg gate
|
|
127
|
+
- [birddog](https://github.com/MukundaKatta/birddog) — audited Bright Data egress for scraping agents
|
|
128
|
+
- [agentvet](https://github.com/MukundaKatta/agentvet) — tool-arg validation with retry hints
|
|
129
|
+
- [agentsnap](https://github.com/MukundaKatta/agentsnap) — snapshot tests for agent traces
|
|
130
|
+
|
|
131
|
+
## License
|
|
132
|
+
|
|
133
|
+
MIT
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# recruitertriage
|
|
2
|
+
|
|
3
|
+
Triage recruiter outreach with a small (<1B) language model. Built
|
|
4
|
+
for the [HuggingFace Build Small Hackathon][bs].
|
|
5
|
+
|
|
6
|
+
[bs]: https://huggingface.co/Build-Small-Hackathon
|
|
7
|
+
|
|
8
|
+
The default backend is
|
|
9
|
+
[SmolLM2-360M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct).
|
|
10
|
+
It fits on a free HF Space CPU and still gets useful triage signal.
|
|
11
|
+
You can swap in any callable LM (a fine-tune, Qwen-0.5B, a
|
|
12
|
+
heuristic, anything that maps `prompt -> string`).
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install recruitertriage # core only (zero heavy deps)
|
|
18
|
+
pip install "recruitertriage[smollm]" # + SmolLM2 / transformers
|
|
19
|
+
pip install "recruitertriage[space]" # + Gradio for the Space UI
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Python 3.10+.
|
|
23
|
+
|
|
24
|
+
## What it does
|
|
25
|
+
|
|
26
|
+
Each piece of recruiter outreach gets bucketed into one of five labels:
|
|
27
|
+
|
|
28
|
+
| label | meaning |
|
|
29
|
+
|--------------|------------------------------------------------------|
|
|
30
|
+
| `interview` | looks like a real fit, schedule a call |
|
|
31
|
+
| `needs_info` | interesting but missing role/comp/level |
|
|
32
|
+
| `reject` | clearly off (wrong stack, wrong level) |
|
|
33
|
+
| `spam` | not a real role (vendor sales, lead-gen, scam) |
|
|
34
|
+
| `unsure` | low confidence — falls through to a human |
|
|
35
|
+
|
|
36
|
+
Each `Decision` carries a label, a confidence (0..1), a one-line
|
|
37
|
+
reason, an optional suggested reply, and the raw signals the model
|
|
38
|
+
saw.
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from recruitertriage import triage, HeuristicLLM
|
|
44
|
+
|
|
45
|
+
# Zero-dep fallback (good for tests/CI):
|
|
46
|
+
d = triage(
|
|
47
|
+
subject="Staff ML Engineer @ AcmeAI",
|
|
48
|
+
body="Hi - we're hiring a Staff ML Engineer. Comp 250-320k...",
|
|
49
|
+
llm=HeuristicLLM(),
|
|
50
|
+
)
|
|
51
|
+
print(d.label, d.confidence, d.suggested_reply)
|
|
52
|
+
|
|
53
|
+
# Real small-model backend (requires the `smollm` extra):
|
|
54
|
+
from recruitertriage.smollm import make_smollm
|
|
55
|
+
llm = make_smollm("HuggingFaceTB/SmolLM2-360M-Instruct")
|
|
56
|
+
d = triage(subject="...", body="...", llm=llm)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
`triage()` only cares that `llm` is `Callable[[str], str]`, so any
|
|
60
|
+
small instruction-tuned LM works. The core parser is tolerant of
|
|
61
|
+
small-model JSON quirks (code fences, prose around the object, etc.).
|
|
62
|
+
|
|
63
|
+
## HuggingFace Space
|
|
64
|
+
|
|
65
|
+
The `space/` directory is the deployable Gradio app:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pip install "recruitertriage[space]"
|
|
69
|
+
python space/app.py
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
To publish:
|
|
73
|
+
|
|
74
|
+
1. `hf login`
|
|
75
|
+
2. Create a new Gradio Space (CPU basic is enough)
|
|
76
|
+
3. Push the contents of `space/` as the Space root
|
|
77
|
+
|
|
78
|
+
## Demo
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
python examples/demo.py # uses HeuristicLLM
|
|
82
|
+
python examples/demo.py --smollm # uses SmolLM2-360M-Instruct
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Companion libraries
|
|
86
|
+
|
|
87
|
+
`recruitertriage` slots into the @mukundakatta agent-stack:
|
|
88
|
+
|
|
89
|
+
- [agentleash](https://github.com/MukundaKatta/agentleash) — USD/call budget cap + tool-arg gate
|
|
90
|
+
- [birddog](https://github.com/MukundaKatta/birddog) — audited Bright Data egress for scraping agents
|
|
91
|
+
- [agentvet](https://github.com/MukundaKatta/agentvet) — tool-arg validation with retry hints
|
|
92
|
+
- [agentsnap](https://github.com/MukundaKatta/agentsnap) — snapshot tests for agent traces
|
|
93
|
+
|
|
94
|
+
## License
|
|
95
|
+
|
|
96
|
+
MIT
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Run recruitertriage against a handful of canned emails.
|
|
2
|
+
|
|
3
|
+
By default uses the zero-dep HeuristicLLM so this script works in CI
|
|
4
|
+
and on machines without transformers/torch installed.
|
|
5
|
+
|
|
6
|
+
Pass --smollm to use SmolLM2-360M-Instruct (requires
|
|
7
|
+
`pip install "recruitertriage[smollm]"`).
|
|
8
|
+
|
|
9
|
+
python examples/demo.py
|
|
10
|
+
python examples/demo.py --smollm
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import json
|
|
17
|
+
|
|
18
|
+
from recruitertriage import HeuristicLLM, triage
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
SAMPLES = [
|
|
22
|
+
(
|
|
23
|
+
"Staff ML Engineer @ AcmeAI",
|
|
24
|
+
"Hi Mukunda - we're hiring a Staff ML Engineer to lead agents. "
|
|
25
|
+
"Comp band 250-320k base + equity. Remote PT/PST friendly. "
|
|
26
|
+
"Are you open to a 15-min chat next week?",
|
|
27
|
+
),
|
|
28
|
+
(
|
|
29
|
+
"Java backend role",
|
|
30
|
+
"Hello, we are sourcing for a Java backend developer at a "
|
|
31
|
+
"Fortune 100 bank. Onsite Charlotte. Interested?",
|
|
32
|
+
),
|
|
33
|
+
(
|
|
34
|
+
"Lead gen partnership",
|
|
35
|
+
"Hi, our agency can send you 50 verified candidate emails per week. "
|
|
36
|
+
"Buy emails, save time. Reply YES for pricing.",
|
|
37
|
+
),
|
|
38
|
+
(
|
|
39
|
+
"Quick chat",
|
|
40
|
+
"Hey - saw your GitHub. Could you send your resume if you're "
|
|
41
|
+
"open to a chat? Let me know if interested!",
|
|
42
|
+
),
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def main() -> None:
|
|
47
|
+
ap = argparse.ArgumentParser()
|
|
48
|
+
ap.add_argument("--smollm", action="store_true",
|
|
49
|
+
help="Use SmolLM2-360M-Instruct (heavy import).")
|
|
50
|
+
args = ap.parse_args()
|
|
51
|
+
|
|
52
|
+
if args.smollm:
|
|
53
|
+
from recruitertriage.smollm import make_smollm
|
|
54
|
+
llm = make_smollm()
|
|
55
|
+
backend = "SmolLM2-360M-Instruct"
|
|
56
|
+
else:
|
|
57
|
+
llm = HeuristicLLM()
|
|
58
|
+
backend = "HeuristicLLM (zero-dep fallback)"
|
|
59
|
+
|
|
60
|
+
print(f"backend: {backend}\n")
|
|
61
|
+
|
|
62
|
+
for i, (subj, body) in enumerate(SAMPLES, 1):
|
|
63
|
+
d = triage(subject=subj, body=body, llm=llm)
|
|
64
|
+
print(f"[{i}] {subj}")
|
|
65
|
+
print(f" label={d.label.value:<11} conf={d.confidence:.2f} reason={d.reason}")
|
|
66
|
+
if d.suggested_reply:
|
|
67
|
+
print(f" reply: {d.suggested_reply}")
|
|
68
|
+
print()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
if __name__ == "__main__":
|
|
72
|
+
main()
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.24"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "recruitertriage"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Triage recruiter outreach with a small (<1B) language model. Built for the HuggingFace Build Small Hackathon."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Mukunda Rao Katta", email = "mukunda.vjcs6@gmail.com" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"ai",
|
|
15
|
+
"llm",
|
|
16
|
+
"small-models",
|
|
17
|
+
"huggingface",
|
|
18
|
+
"smollm",
|
|
19
|
+
"recruiter",
|
|
20
|
+
"inbox",
|
|
21
|
+
"triage",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 3 - Alpha",
|
|
25
|
+
"Intended Audience :: Developers",
|
|
26
|
+
"License :: OSI Approved :: MIT License",
|
|
27
|
+
"Operating System :: OS Independent",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Programming Language :: Python :: 3.13",
|
|
34
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
35
|
+
]
|
|
36
|
+
# Core lib has NO heavy deps; you bring your own callable LM.
|
|
37
|
+
# The HF model integration lives in the [smollm] extra.
|
|
38
|
+
dependencies = []
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
smollm = ["transformers>=4.44", "torch>=2.2", "accelerate>=0.30"]
|
|
42
|
+
space = ["gradio>=4.40", "transformers>=4.44", "torch>=2.2", "accelerate>=0.30"]
|
|
43
|
+
dev = ["pytest>=8.0", "ruff>=0.4"]
|
|
44
|
+
|
|
45
|
+
[project.urls]
|
|
46
|
+
Homepage = "https://github.com/MukundaKatta/recruitertriage"
|
|
47
|
+
Source = "https://github.com/MukundaKatta/recruitertriage"
|
|
48
|
+
Issues = "https://github.com/MukundaKatta/recruitertriage/issues"
|
|
49
|
+
|
|
50
|
+
[tool.hatch.build.targets.wheel]
|
|
51
|
+
packages = ["src/recruitertriage"]
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""Gradio Space for recruitertriage on HuggingFace.
|
|
2
|
+
|
|
3
|
+
Built for the HuggingFace Build Small Hackathon. Uses
|
|
4
|
+
HuggingFaceTB/SmolLM2-360M-Instruct (under 1B params) as the underlying
|
|
5
|
+
language model. Fits on a free CPU Space.
|
|
6
|
+
|
|
7
|
+
To deploy:
|
|
8
|
+
|
|
9
|
+
1. `huggingface-cli login` (or use `hf` CLI)
|
|
10
|
+
2. Create a new Space (Gradio SDK, CPU basic)
|
|
11
|
+
3. Push this `space/` directory as the Space root.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
|
+
# Make the local src/ importable when running the Space directly from
|
|
21
|
+
# this directory (the published Space pins recruitertriage as a normal
|
|
22
|
+
# pip dep via requirements.txt instead).
|
|
23
|
+
_HERE = os.path.dirname(os.path.abspath(__file__))
|
|
24
|
+
_SRC = os.path.normpath(os.path.join(_HERE, "..", "src"))
|
|
25
|
+
if os.path.isdir(_SRC) and _SRC not in sys.path:
|
|
26
|
+
sys.path.insert(0, _SRC)
|
|
27
|
+
|
|
28
|
+
import gradio as gr
|
|
29
|
+
|
|
30
|
+
from recruitertriage import HeuristicLLM, triage
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Lazily build the SmolLM2 model so the Space starts fast and only
|
|
34
|
+
# pays the model-load cost on first request.
|
|
35
|
+
_smollm_cache = {"fn": None}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _get_llm(backend: str):
|
|
39
|
+
if backend == "heuristic":
|
|
40
|
+
return HeuristicLLM()
|
|
41
|
+
if _smollm_cache["fn"] is None:
|
|
42
|
+
from recruitertriage.smollm import make_smollm
|
|
43
|
+
_smollm_cache["fn"] = make_smollm()
|
|
44
|
+
return _smollm_cache["fn"]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
LABEL_EMOJI = {
|
|
48
|
+
"interview": "yes",
|
|
49
|
+
"needs_info": "ask",
|
|
50
|
+
"reject": "no",
|
|
51
|
+
"spam": "block",
|
|
52
|
+
"unsure": "shrug",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _do_triage(subject: str, body: str, backend: str):
|
|
57
|
+
if not subject.strip() and not body.strip():
|
|
58
|
+
return "Paste a recruiter email above.", "", ""
|
|
59
|
+
|
|
60
|
+
llm = _get_llm(backend)
|
|
61
|
+
d = triage(subject=subject, body=body, llm=llm)
|
|
62
|
+
badge = f"{LABEL_EMOJI.get(d.label.value, '?')} {d.label.value} ({d.confidence:.0%})"
|
|
63
|
+
detail = f"**Reason:** {d.reason}\n\n**Signals:** {json.dumps(d.signals)}"
|
|
64
|
+
reply = d.suggested_reply or "_(no suggested reply)_"
|
|
65
|
+
return badge, detail, reply
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
SAMPLES = [
|
|
69
|
+
[
|
|
70
|
+
"Staff ML Engineer @ AcmeAI",
|
|
71
|
+
"Hi - we're hiring a Staff ML Engineer to lead agents. "
|
|
72
|
+
"Comp band 250-320k base + equity. Remote PT/PST friendly. "
|
|
73
|
+
"Are you open to a 15-min chat next week?",
|
|
74
|
+
],
|
|
75
|
+
[
|
|
76
|
+
"Quick question",
|
|
77
|
+
"Hey, would you be open to a chat? Send your resume if "
|
|
78
|
+
"interested!",
|
|
79
|
+
],
|
|
80
|
+
[
|
|
81
|
+
"Lead gen partnership",
|
|
82
|
+
"Hi, our agency can send you 50 verified candidate emails per "
|
|
83
|
+
"week. Buy emails, save time. Reply YES for pricing.",
|
|
84
|
+
],
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
with gr.Blocks(title="recruitertriage", theme="soft") as demo:
|
|
89
|
+
gr.Markdown(
|
|
90
|
+
"# recruitertriage\n"
|
|
91
|
+
"Triage recruiter outreach with a small (<1B) language model. "
|
|
92
|
+
"Built for the HuggingFace Build Small Hackathon. "
|
|
93
|
+
"Backed by [SmolLM2-360M-Instruct]"
|
|
94
|
+
"(https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct)."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
with gr.Row():
|
|
98
|
+
with gr.Column():
|
|
99
|
+
subject = gr.Textbox(label="Subject", lines=1)
|
|
100
|
+
body = gr.Textbox(label="Body", lines=10)
|
|
101
|
+
backend = gr.Radio(
|
|
102
|
+
["smollm", "heuristic"],
|
|
103
|
+
value="smollm",
|
|
104
|
+
label="Backend",
|
|
105
|
+
info="smollm = SmolLM2-360M-Instruct. heuristic = zero-dep keyword fallback (instant).",
|
|
106
|
+
)
|
|
107
|
+
run = gr.Button("Triage", variant="primary")
|
|
108
|
+
with gr.Column():
|
|
109
|
+
label = gr.Markdown(label="Decision")
|
|
110
|
+
detail = gr.Markdown(label="Detail")
|
|
111
|
+
reply = gr.Textbox(label="Suggested reply", lines=4)
|
|
112
|
+
|
|
113
|
+
run.click(_do_triage, [subject, body, backend], [label, detail, reply])
|
|
114
|
+
gr.Examples(examples=SAMPLES, inputs=[subject, body])
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
demo.launch()
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""recruitertriage - triage recruiter outreach with a small (<1B) LM.
|
|
2
|
+
|
|
3
|
+
A tiny, dependency-light core. Bring your own callable language model
|
|
4
|
+
(SmolLM2, Qwen-0.5B, your own fine-tune, or even a heuristic).
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
|
|
8
|
+
from recruitertriage import triage, Decision
|
|
9
|
+
|
|
10
|
+
def my_llm(prompt: str) -> str:
|
|
11
|
+
# call SmolLM2-360M, return raw text
|
|
12
|
+
...
|
|
13
|
+
|
|
14
|
+
result: Decision = triage(
|
|
15
|
+
subject="Senior ML role at Acme",
|
|
16
|
+
body="Hi! Are you open to a quick chat about a Senior MLE role...",
|
|
17
|
+
llm=my_llm,
|
|
18
|
+
)
|
|
19
|
+
print(result.label, result.confidence, result.reason)
|
|
20
|
+
|
|
21
|
+
For the HuggingFace Build Small Hackathon entry, see `space/app.py`
|
|
22
|
+
for a Gradio UI bundled with SmolLM2-360M-Instruct.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from .core import (
|
|
28
|
+
Decision,
|
|
29
|
+
HeuristicLLM,
|
|
30
|
+
Label,
|
|
31
|
+
triage,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"Decision",
|
|
36
|
+
"HeuristicLLM",
|
|
37
|
+
"Label",
|
|
38
|
+
"triage",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
"""Core triage logic. No heavy deps — bring your own LM."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Callable, Iterable
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ---- decision shape -------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Label(str, Enum):
|
|
16
|
+
"""Five buckets a recruiter outreach can fall into.
|
|
17
|
+
|
|
18
|
+
`interview` - looks like a strong fit, schedule a call
|
|
19
|
+
`needs_info` - interesting but missing key details (role, comp, location)
|
|
20
|
+
`reject` - clearly off (wrong stack, mass blast, sketchy)
|
|
21
|
+
`spam` - not a real role / vendor pitch / sales
|
|
22
|
+
`unsure` - the model couldn't decide; fall through to human
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
interview = "interview"
|
|
26
|
+
needs_info = "needs_info"
|
|
27
|
+
reject = "reject"
|
|
28
|
+
spam = "spam"
|
|
29
|
+
unsure = "unsure"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Decision:
|
|
34
|
+
label: Label
|
|
35
|
+
confidence: float # 0..1
|
|
36
|
+
reason: str
|
|
37
|
+
suggested_reply: str | None = None
|
|
38
|
+
signals: dict[str, object] = field(default_factory=dict)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
LLM = Callable[[str], str]
|
|
42
|
+
"""A callable language model: takes a prompt string, returns raw text."""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ---- prompt ---------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
_SYSTEM = (
|
|
49
|
+
"You triage recruiter outreach. Pick exactly one label from: "
|
|
50
|
+
"interview, needs_info, reject, spam, unsure. "
|
|
51
|
+
"Return JSON ONLY with keys: label, confidence (0..1), reason (one short line), "
|
|
52
|
+
"suggested_reply (1-3 sentences, plain, no AI phrases). "
|
|
53
|
+
"If the email is vendor sales, pitch, or unrelated to a job role, return spam. "
|
|
54
|
+
"If it lacks role title or comp range AND looks legit, return needs_info. "
|
|
55
|
+
"If the stack/level is clearly wrong, return reject. "
|
|
56
|
+
"If it looks like a real job match, return interview."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _build_prompt(subject: str, body: str, hints: dict[str, object] | None) -> str:
|
|
61
|
+
hints_block = ""
|
|
62
|
+
if hints:
|
|
63
|
+
hints_block = "\n\nUser hints:\n" + "\n".join(
|
|
64
|
+
f"- {k}: {v}" for k, v in hints.items()
|
|
65
|
+
)
|
|
66
|
+
return (
|
|
67
|
+
f"{_SYSTEM}\n\n"
|
|
68
|
+
f"Subject: {subject}\n\n"
|
|
69
|
+
f"Body:\n{body}{hints_block}\n\n"
|
|
70
|
+
f"JSON:"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---- parsing --------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
_JSON_RE = re.compile(r"\{.*?\}", re.DOTALL)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _parse_llm_json(raw: str) -> dict | None:
|
|
81
|
+
"""Pull the first JSON object out of a model's free-form text.
|
|
82
|
+
|
|
83
|
+
Small models love to wrap things in prose, code fences, or extra
|
|
84
|
+
commentary. Find the first {...} block and try to parse it."""
|
|
85
|
+
# try whole-string first (well-behaved model)
|
|
86
|
+
raw = raw.strip()
|
|
87
|
+
try:
|
|
88
|
+
return json.loads(raw)
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
91
|
+
m = _JSON_RE.search(raw)
|
|
92
|
+
if not m:
|
|
93
|
+
return None
|
|
94
|
+
try:
|
|
95
|
+
return json.loads(m.group(0))
|
|
96
|
+
except Exception:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _coerce_decision(obj: dict | None) -> Decision:
|
|
101
|
+
if not isinstance(obj, dict):
|
|
102
|
+
return Decision(Label.unsure, 0.0, "model output unparseable")
|
|
103
|
+
|
|
104
|
+
label_raw = str(obj.get("label", "")).strip().lower()
|
|
105
|
+
try:
|
|
106
|
+
label = Label(label_raw)
|
|
107
|
+
except ValueError:
|
|
108
|
+
label = Label.unsure
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
conf = float(obj.get("confidence", 0.0))
|
|
112
|
+
except (TypeError, ValueError):
|
|
113
|
+
conf = 0.0
|
|
114
|
+
conf = max(0.0, min(1.0, conf))
|
|
115
|
+
|
|
116
|
+
reason = str(obj.get("reason", "")).strip() or "no reason"
|
|
117
|
+
reply = obj.get("suggested_reply")
|
|
118
|
+
if reply is not None:
|
|
119
|
+
reply = str(reply).strip() or None
|
|
120
|
+
|
|
121
|
+
return Decision(label, conf, reason, suggested_reply=reply)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ---- public entry ---------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def triage(
|
|
128
|
+
*,
|
|
129
|
+
subject: str,
|
|
130
|
+
body: str,
|
|
131
|
+
llm: LLM,
|
|
132
|
+
hints: dict[str, object] | None = None,
|
|
133
|
+
) -> Decision:
|
|
134
|
+
"""Triage one piece of recruiter outreach.
|
|
135
|
+
|
|
136
|
+
Pass any callable that maps prompt-string -> raw-string output.
|
|
137
|
+
Small models (SmolLM2, Qwen-0.5B, etc.) are the target."""
|
|
138
|
+
prompt = _build_prompt(subject=subject, body=body, hints=hints)
|
|
139
|
+
raw = llm(prompt)
|
|
140
|
+
parsed = _parse_llm_json(raw)
|
|
141
|
+
decision = _coerce_decision(parsed)
|
|
142
|
+
decision.signals["prompt_chars"] = len(prompt)
|
|
143
|
+
decision.signals["raw_chars"] = len(raw)
|
|
144
|
+
return decision
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ---- offline / heuristic fallback LM --------------------------------------
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
_INTERVIEW_HINTS = (
|
|
151
|
+
"principal",
|
|
152
|
+
"staff",
|
|
153
|
+
"senior",
|
|
154
|
+
"lead",
|
|
155
|
+
"ml engineer",
|
|
156
|
+
"machine learning",
|
|
157
|
+
"ai engineer",
|
|
158
|
+
"applied scientist",
|
|
159
|
+
"research engineer",
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
_REJECT_HINTS = (
|
|
163
|
+
"java",
|
|
164
|
+
".net",
|
|
165
|
+
"salesforce admin",
|
|
166
|
+
"qa manual",
|
|
167
|
+
"support engineer",
|
|
168
|
+
"tier 1 support",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
_SPAM_HINTS = (
|
|
172
|
+
"purchase order",
|
|
173
|
+
"lead gen",
|
|
174
|
+
"we have leads",
|
|
175
|
+
"guaranteed candidates",
|
|
176
|
+
"buy emails",
|
|
177
|
+
"outsource your hiring",
|
|
178
|
+
"verified bitcoin",
|
|
179
|
+
"crypto investment",
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
_INFO_HINTS = (
|
|
183
|
+
"more info",
|
|
184
|
+
"let me know if interested",
|
|
185
|
+
"send your resume",
|
|
186
|
+
"open to a chat",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class HeuristicLLM:
|
|
191
|
+
"""Zero-dep fallback 'model'. Pattern-matches on keywords and emits
|
|
192
|
+
a JSON-shaped string so the core pipeline still works without
|
|
193
|
+
transformers/torch installed. Good for tests, CI, and offline demos.
|
|
194
|
+
|
|
195
|
+
Don't ship this to prod. Plug in SmolLM2 or similar for real runs."""
|
|
196
|
+
|
|
197
|
+
def __call__(self, prompt: str) -> str:
|
|
198
|
+
body = prompt.lower()
|
|
199
|
+
|
|
200
|
+
if any(s in body for s in _SPAM_HINTS):
|
|
201
|
+
return json.dumps({
|
|
202
|
+
"label": "spam",
|
|
203
|
+
"confidence": 0.85,
|
|
204
|
+
"reason": "matched spam keyword",
|
|
205
|
+
"suggested_reply": None,
|
|
206
|
+
})
|
|
207
|
+
if any(s in body for s in _REJECT_HINTS):
|
|
208
|
+
return json.dumps({
|
|
209
|
+
"label": "reject",
|
|
210
|
+
"confidence": 0.78,
|
|
211
|
+
"reason": "stack/level mismatch",
|
|
212
|
+
"suggested_reply": (
|
|
213
|
+
"Thanks for reaching out. Not the right fit for me right now."
|
|
214
|
+
),
|
|
215
|
+
})
|
|
216
|
+
if any(s in body for s in _INTERVIEW_HINTS):
|
|
217
|
+
return json.dumps({
|
|
218
|
+
"label": "interview",
|
|
219
|
+
"confidence": 0.72,
|
|
220
|
+
"reason": "title/level looks like a fit",
|
|
221
|
+
"suggested_reply": (
|
|
222
|
+
"Thanks, this looks interesting. Could you share the role "
|
|
223
|
+
"level, comp band, and location?"
|
|
224
|
+
),
|
|
225
|
+
})
|
|
226
|
+
if any(s in body for s in _INFO_HINTS):
|
|
227
|
+
return json.dumps({
|
|
228
|
+
"label": "needs_info",
|
|
229
|
+
"confidence": 0.6,
|
|
230
|
+
"reason": "no role title or details given",
|
|
231
|
+
"suggested_reply": (
|
|
232
|
+
"Happy to chat. Could you share the company, role title, "
|
|
233
|
+
"and comp band first?"
|
|
234
|
+
),
|
|
235
|
+
})
|
|
236
|
+
return json.dumps({
|
|
237
|
+
"label": "unsure",
|
|
238
|
+
"confidence": 0.3,
|
|
239
|
+
"reason": "no clear signals",
|
|
240
|
+
})
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# ---- batch helpers --------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def triage_batch(
|
|
247
|
+
emails: Iterable[tuple[str, str]],
|
|
248
|
+
*,
|
|
249
|
+
llm: LLM,
|
|
250
|
+
hints: dict[str, object] | None = None,
|
|
251
|
+
) -> list[Decision]:
|
|
252
|
+
"""Triage a list of (subject, body) tuples. Pure convenience."""
|
|
253
|
+
return [triage(subject=s, body=b, llm=llm, hints=hints) for s, b in emails]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""SmolLM2 (HuggingFaceTB/SmolLM2-360M-Instruct) integration.
|
|
2
|
+
|
|
3
|
+
Lives behind an extra: `pip install "recruitertriage[smollm]"`.
|
|
4
|
+
|
|
5
|
+
The default model is small enough to run on a laptop CPU; for the
|
|
6
|
+
HuggingFace Build Small Hackathon we explicitly target sub-1B
|
|
7
|
+
parameter models. Pass any other instruction-tuned causal LM by name."""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def make_smollm(
|
|
15
|
+
model_id: str = "HuggingFaceTB/SmolLM2-360M-Instruct",
|
|
16
|
+
*,
|
|
17
|
+
device: str | None = None,
|
|
18
|
+
max_new_tokens: int = 220,
|
|
19
|
+
temperature: float = 0.2,
|
|
20
|
+
):
|
|
21
|
+
"""Build a callable LM around a HuggingFace causal model.
|
|
22
|
+
|
|
23
|
+
Returns a function `(prompt: str) -> str` that you can pass straight
|
|
24
|
+
into `recruitertriage.triage(..., llm=...)`."""
|
|
25
|
+
try:
|
|
26
|
+
import torch
|
|
27
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
28
|
+
except Exception as e: # pragma: no cover - import guard
|
|
29
|
+
raise RuntimeError(
|
|
30
|
+
'recruitertriage[smollm] extras are required for SmolLM2. '
|
|
31
|
+
'Install with: pip install "recruitertriage[smollm]"'
|
|
32
|
+
) from e
|
|
33
|
+
|
|
34
|
+
if device is None:
|
|
35
|
+
if torch.cuda.is_available():
|
|
36
|
+
device = "cuda"
|
|
37
|
+
elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
|
|
38
|
+
device = "mps"
|
|
39
|
+
else:
|
|
40
|
+
device = "cpu"
|
|
41
|
+
|
|
42
|
+
tok = AutoTokenizer.from_pretrained(model_id)
|
|
43
|
+
model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
|
|
44
|
+
model.eval()
|
|
45
|
+
|
|
46
|
+
def _call(prompt: str) -> str:
|
|
47
|
+
# SmolLM2-Instruct expects a chat template; the prompt becomes
|
|
48
|
+
# a single user message. We deliberately keep the system message
|
|
49
|
+
# baked into `prompt` so callers can swap models without breaking
|
|
50
|
+
# the template.
|
|
51
|
+
messages = [{"role": "user", "content": prompt}]
|
|
52
|
+
rendered = tok.apply_chat_template(
|
|
53
|
+
messages, tokenize=False, add_generation_prompt=True
|
|
54
|
+
)
|
|
55
|
+
inputs: dict[str, Any] = tok(rendered, return_tensors="pt").to(device)
|
|
56
|
+
with torch_no_grad():
|
|
57
|
+
out = model.generate(
|
|
58
|
+
**inputs,
|
|
59
|
+
max_new_tokens=max_new_tokens,
|
|
60
|
+
do_sample=temperature > 0,
|
|
61
|
+
temperature=temperature,
|
|
62
|
+
pad_token_id=tok.eos_token_id,
|
|
63
|
+
)
|
|
64
|
+
# only decode the newly-generated tokens
|
|
65
|
+
new = out[0, inputs["input_ids"].shape[1]:]
|
|
66
|
+
return tok.decode(new, skip_special_tokens=True).strip()
|
|
67
|
+
|
|
68
|
+
return _call
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def torch_no_grad():
|
|
72
|
+
"""Lazy torch.no_grad() so importing this module is cheap when torch
|
|
73
|
+
isn't installed (the make_smollm call above already imported it)."""
|
|
74
|
+
import torch # noqa: PLC0415
|
|
75
|
+
|
|
76
|
+
return torch.no_grad()
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Tests for recruitertriage core triage logic.
|
|
2
|
+
|
|
3
|
+
These tests don't need transformers/torch — they exercise the
|
|
4
|
+
heuristic LM and the JSON parser directly."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from recruitertriage import Decision, HeuristicLLM, Label, triage
|
|
13
|
+
from recruitertriage.core import _coerce_decision, _parse_llm_json, triage_batch
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ---- _parse_llm_json ------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_parse_clean_json():
|
|
20
|
+
obj = _parse_llm_json('{"label":"interview","confidence":0.8}')
|
|
21
|
+
assert obj == {"label": "interview", "confidence": 0.8}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_parse_json_buried_in_prose():
|
|
25
|
+
raw = 'Sure! Here is the result:\n```json\n{"label":"reject"}\n```\nHope that helps!'
|
|
26
|
+
obj = _parse_llm_json(raw)
|
|
27
|
+
assert obj == {"label": "reject"}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_parse_garbage_returns_none():
|
|
31
|
+
assert _parse_llm_json("totally not json") is None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ---- _coerce_decision -----------------------------------------------------
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_coerce_unknown_label_falls_back_to_unsure():
|
|
38
|
+
d = _coerce_decision({"label": "yolo", "confidence": 0.9, "reason": "x"})
|
|
39
|
+
assert d.label is Label.unsure
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_coerce_clamps_confidence():
|
|
43
|
+
d = _coerce_decision({"label": "interview", "confidence": 5.0, "reason": "x"})
|
|
44
|
+
assert d.confidence == 1.0
|
|
45
|
+
d2 = _coerce_decision({"label": "interview", "confidence": -1.0, "reason": "x"})
|
|
46
|
+
assert d2.confidence == 0.0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_coerce_none_returns_unsure():
|
|
50
|
+
d = _coerce_decision(None)
|
|
51
|
+
assert d.label is Label.unsure
|
|
52
|
+
assert d.confidence == 0.0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ---- HeuristicLLM end-to-end ---------------------------------------------
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@pytest.mark.parametrize(
|
|
59
|
+
"subject,body,expected",
|
|
60
|
+
[
|
|
61
|
+
# spam
|
|
62
|
+
(
|
|
63
|
+
"We have leads",
|
|
64
|
+
"We sell verified bitcoin investment opportunities to your team.",
|
|
65
|
+
Label.spam,
|
|
66
|
+
),
|
|
67
|
+
# reject (stack mismatch)
|
|
68
|
+
(
|
|
69
|
+
"Java backend role at BankCorp",
|
|
70
|
+
"Hi, we're hiring a Java developer for our trading platform.",
|
|
71
|
+
Label.reject,
|
|
72
|
+
),
|
|
73
|
+
# interview (clear fit)
|
|
74
|
+
(
|
|
75
|
+
"Staff ML Engineer at AcmeAI",
|
|
76
|
+
"Looking for a Staff ML Engineer to lead our agents team. "
|
|
77
|
+
"Comp 250-320k base + equity.",
|
|
78
|
+
Label.interview,
|
|
79
|
+
),
|
|
80
|
+
# needs_info (vague but legit)
|
|
81
|
+
(
|
|
82
|
+
"Quick question",
|
|
83
|
+
"Hey, would you be open to a chat? Send your resume "
|
|
84
|
+
"if interested!",
|
|
85
|
+
Label.needs_info,
|
|
86
|
+
),
|
|
87
|
+
],
|
|
88
|
+
)
|
|
89
|
+
def test_heuristic_llm_round_trip(subject: str, body: str, expected: Label):
|
|
90
|
+
d = triage(subject=subject, body=body, llm=HeuristicLLM())
|
|
91
|
+
assert d.label is expected
|
|
92
|
+
assert 0.0 <= d.confidence <= 1.0
|
|
93
|
+
assert d.reason
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_unsure_when_no_signals():
|
|
97
|
+
d = triage(
|
|
98
|
+
subject="Hello",
|
|
99
|
+
body="Hi, just saying hi.",
|
|
100
|
+
llm=HeuristicLLM(),
|
|
101
|
+
)
|
|
102
|
+
assert d.label is Label.unsure
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ---- BYO-LLM contract -----------------------------------------------------
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_byo_llm_callable_works():
|
|
109
|
+
def fake_llm(prompt: str) -> str:
|
|
110
|
+
return json.dumps({
|
|
111
|
+
"label": "interview",
|
|
112
|
+
"confidence": 0.91,
|
|
113
|
+
"reason": "test fixture",
|
|
114
|
+
"suggested_reply": "thanks, send details",
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
d = triage(subject="hi", body="hi", llm=fake_llm)
|
|
118
|
+
assert d.label is Label.interview
|
|
119
|
+
assert d.confidence == pytest.approx(0.91)
|
|
120
|
+
assert d.suggested_reply == "thanks, send details"
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def test_signals_captured():
|
|
124
|
+
d = triage(subject="x", body="y", llm=HeuristicLLM())
|
|
125
|
+
assert "prompt_chars" in d.signals
|
|
126
|
+
assert "raw_chars" in d.signals
|
|
127
|
+
assert isinstance(d.signals["prompt_chars"], int)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ---- batch ---------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_triage_batch():
|
|
134
|
+
pairs = [
|
|
135
|
+
("Staff MLE role", "Senior Machine Learning Engineer at AcmeAI"),
|
|
136
|
+
("Java dev", "We need a Java backend dev"),
|
|
137
|
+
]
|
|
138
|
+
results = triage_batch(pairs, llm=HeuristicLLM())
|
|
139
|
+
assert len(results) == 2
|
|
140
|
+
assert results[0].label is Label.interview
|
|
141
|
+
assert results[1].label is Label.reject
|