recruitertriage 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ .venv/
7
+ dist/
8
+ build/
9
+ *.egg-info/
10
+ .DS_Store
11
+ .gradio/
12
+ flagged/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mukunda Rao Katta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.4
2
+ Name: recruitertriage
3
+ Version: 0.1.0
4
+ Summary: Triage recruiter outreach with a small (<1B) language model. Built for the HuggingFace Build Small Hackathon.
5
+ Project-URL: Homepage, https://github.com/MukundaKatta/recruitertriage
6
+ Project-URL: Source, https://github.com/MukundaKatta/recruitertriage
7
+ Project-URL: Issues, https://github.com/MukundaKatta/recruitertriage/issues
8
+ Author-email: Mukunda Rao Katta <mukunda.vjcs6@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: ai,huggingface,inbox,llm,recruiter,small-models,smollm,triage
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Requires-Python: >=3.10
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=8.0; extra == 'dev'
26
+ Requires-Dist: ruff>=0.4; extra == 'dev'
27
+ Provides-Extra: smollm
28
+ Requires-Dist: accelerate>=0.30; extra == 'smollm'
29
+ Requires-Dist: torch>=2.2; extra == 'smollm'
30
+ Requires-Dist: transformers>=4.44; extra == 'smollm'
31
+ Provides-Extra: space
32
+ Requires-Dist: accelerate>=0.30; extra == 'space'
33
+ Requires-Dist: gradio>=4.40; extra == 'space'
34
+ Requires-Dist: torch>=2.2; extra == 'space'
35
+ Requires-Dist: transformers>=4.44; extra == 'space'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # recruitertriage
39
+
40
+ Triage recruiter outreach with a small (<1B) language model. Built
41
+ for the [HuggingFace Build Small Hackathon][bs].
42
+
43
+ [bs]: https://huggingface.co/Build-Small-Hackathon
44
+
45
+ The default backend is
46
+ [SmolLM2-360M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct).
47
+ It fits on a free HF Space CPU and still gets useful triage signal.
48
+ You can swap in any callable LM (a fine-tune, Qwen-0.5B, a
49
+ heuristic, anything that maps `prompt -> string`).
50
+
51
+ ## Install
52
+
53
+ ```bash
54
+ pip install recruitertriage # core only (zero heavy deps)
55
+ pip install "recruitertriage[smollm]" # + SmolLM2 / transformers
56
+ pip install "recruitertriage[space]" # + Gradio for the Space UI
57
+ ```
58
+
59
+ Python 3.10+.
60
+
61
+ ## What it does
62
+
63
+ Each piece of recruiter outreach gets bucketed into one of five labels:
64
+
65
+ | label | meaning |
66
+ |--------------|------------------------------------------------------|
67
+ | `interview` | looks like a real fit, schedule a call |
68
+ | `needs_info` | interesting but missing role/comp/level |
69
+ | `reject` | clearly off (wrong stack, wrong level) |
70
+ | `spam` | not a real role (vendor sales, lead-gen, scam) |
71
+ | `unsure` | low confidence — falls through to a human |
72
+
73
+ Each `Decision` carries a label, a confidence (0..1), a one-line
74
+ reason, an optional suggested reply, and the raw signals the model
75
+ saw.
76
+
77
+ ## Usage
78
+
79
+ ```python
80
+ from recruitertriage import triage, HeuristicLLM
81
+
82
+ # Zero-dep fallback (good for tests/CI):
83
+ d = triage(
84
+ subject="Staff ML Engineer @ AcmeAI",
85
+ body="Hi - we're hiring a Staff ML Engineer. Comp 250-320k...",
86
+ llm=HeuristicLLM(),
87
+ )
88
+ print(d.label, d.confidence, d.suggested_reply)
89
+
90
+ # Real small-model backend (requires the `smollm` extra):
91
+ from recruitertriage.smollm import make_smollm
92
+ llm = make_smollm("HuggingFaceTB/SmolLM2-360M-Instruct")
93
+ d = triage(subject="...", body="...", llm=llm)
94
+ ```
95
+
96
+ `triage()` only cares that `llm` is `Callable[[str], str]`, so any
97
+ small instruction-tuned LM works. The core parser is tolerant of
98
+ small-model JSON quirks (code fences, prose around the object, etc.).
99
+
100
+ ## HuggingFace Space
101
+
102
+ The `space/` directory is the deployable Gradio app:
103
+
104
+ ```bash
105
+ pip install "recruitertriage[space]"
106
+ python space/app.py
107
+ ```
108
+
109
+ To publish:
110
+
111
+ 1. `hf login`
112
+ 2. Create a new Gradio Space (CPU basic is enough)
113
+ 3. Push the contents of `space/` as the Space root
114
+
115
+ ## Demo
116
+
117
+ ```bash
118
+ python examples/demo.py # uses HeuristicLLM
119
+ python examples/demo.py --smollm # uses SmolLM2-360M-Instruct
120
+ ```
121
+
122
+ ## Companion libraries
123
+
124
+ `recruitertriage` slots into the @mukundakatta agent-stack:
125
+
126
+ - [agentleash](https://github.com/MukundaKatta/agentleash) — USD/call budget cap + tool-arg gate
127
+ - [birddog](https://github.com/MukundaKatta/birddog) — audited Bright Data egress for scraping agents
128
+ - [agentvet](https://github.com/MukundaKatta/agentvet) — tool-arg validation with retry hints
129
+ - [agentsnap](https://github.com/MukundaKatta/agentsnap) — snapshot tests for agent traces
130
+
131
+ ## License
132
+
133
+ MIT
@@ -0,0 +1,96 @@
1
+ # recruitertriage
2
+
3
+ Triage recruiter outreach with a small (<1B) language model. Built
4
+ for the [HuggingFace Build Small Hackathon][bs].
5
+
6
+ [bs]: https://huggingface.co/Build-Small-Hackathon
7
+
8
+ The default backend is
9
+ [SmolLM2-360M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct).
10
+ It fits on a free HF Space CPU and still gets useful triage signal.
11
+ You can swap in any callable LM (a fine-tune, Qwen-0.5B, a
12
+ heuristic, anything that maps `prompt -> string`).
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install recruitertriage # core only (zero heavy deps)
18
+ pip install "recruitertriage[smollm]" # + SmolLM2 / transformers
19
+ pip install "recruitertriage[space]" # + Gradio for the Space UI
20
+ ```
21
+
22
+ Python 3.10+.
23
+
24
+ ## What it does
25
+
26
+ Each piece of recruiter outreach gets bucketed into one of five labels:
27
+
28
+ | label | meaning |
29
+ |--------------|------------------------------------------------------|
30
+ | `interview` | looks like a real fit, schedule a call |
31
+ | `needs_info` | interesting but missing role/comp/level |
32
+ | `reject` | clearly off (wrong stack, wrong level) |
33
+ | `spam` | not a real role (vendor sales, lead-gen, scam) |
34
+ | `unsure` | low confidence — falls through to a human |
35
+
36
+ Each `Decision` carries a label, a confidence (0..1), a one-line
37
+ reason, an optional suggested reply, and the raw signals the model
38
+ saw.
39
+
40
+ ## Usage
41
+
42
+ ```python
43
+ from recruitertriage import triage, HeuristicLLM
44
+
45
+ # Zero-dep fallback (good for tests/CI):
46
+ d = triage(
47
+ subject="Staff ML Engineer @ AcmeAI",
48
+ body="Hi - we're hiring a Staff ML Engineer. Comp 250-320k...",
49
+ llm=HeuristicLLM(),
50
+ )
51
+ print(d.label, d.confidence, d.suggested_reply)
52
+
53
+ # Real small-model backend (requires the `smollm` extra):
54
+ from recruitertriage.smollm import make_smollm
55
+ llm = make_smollm("HuggingFaceTB/SmolLM2-360M-Instruct")
56
+ d = triage(subject="...", body="...", llm=llm)
57
+ ```
58
+
59
+ `triage()` only cares that `llm` is `Callable[[str], str]`, so any
60
+ small instruction-tuned LM works. The core parser is tolerant of
61
+ small-model JSON quirks (code fences, prose around the object, etc.).
62
+
63
+ ## HuggingFace Space
64
+
65
+ The `space/` directory is the deployable Gradio app:
66
+
67
+ ```bash
68
+ pip install "recruitertriage[space]"
69
+ python space/app.py
70
+ ```
71
+
72
+ To publish:
73
+
74
+ 1. `hf login`
75
+ 2. Create a new Gradio Space (CPU basic is enough)
76
+ 3. Push the contents of `space/` as the Space root
77
+
78
+ ## Demo
79
+
80
+ ```bash
81
+ python examples/demo.py # uses HeuristicLLM
82
+ python examples/demo.py --smollm # uses SmolLM2-360M-Instruct
83
+ ```
84
+
85
+ ## Companion libraries
86
+
87
+ `recruitertriage` slots into the @mukundakatta agent-stack:
88
+
89
+ - [agentleash](https://github.com/MukundaKatta/agentleash) — USD/call budget cap + tool-arg gate
90
+ - [birddog](https://github.com/MukundaKatta/birddog) — audited Bright Data egress for scraping agents
91
+ - [agentvet](https://github.com/MukundaKatta/agentvet) — tool-arg validation with retry hints
92
+ - [agentsnap](https://github.com/MukundaKatta/agentsnap) — snapshot tests for agent traces
93
+
94
+ ## License
95
+
96
+ MIT
@@ -0,0 +1,72 @@
1
+ """Run recruitertriage against a handful of canned emails.
2
+
3
+ By default uses the zero-dep HeuristicLLM so this script works in CI
4
+ and on machines without transformers/torch installed.
5
+
6
+ Pass --smollm to use SmolLM2-360M-Instruct (requires
7
+ `pip install "recruitertriage[smollm]"`).
8
+
9
+ python examples/demo.py
10
+ python examples/demo.py --smollm
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import json
17
+
18
+ from recruitertriage import HeuristicLLM, triage
19
+
20
+
21
+ SAMPLES = [
22
+ (
23
+ "Staff ML Engineer @ AcmeAI",
24
+ "Hi Mukunda - we're hiring a Staff ML Engineer to lead agents. "
25
+ "Comp band 250-320k base + equity. Remote PT/PST friendly. "
26
+ "Are you open to a 15-min chat next week?",
27
+ ),
28
+ (
29
+ "Java backend role",
30
+ "Hello, we are sourcing for a Java backend developer at a "
31
+ "Fortune 100 bank. Onsite Charlotte. Interested?",
32
+ ),
33
+ (
34
+ "Lead gen partnership",
35
+ "Hi, our agency can send you 50 verified candidate emails per week. "
36
+ "Buy emails, save time. Reply YES for pricing.",
37
+ ),
38
+ (
39
+ "Quick chat",
40
+ "Hey - saw your GitHub. Could you send your resume if you're "
41
+ "open to a chat? Let me know if interested!",
42
+ ),
43
+ ]
44
+
45
+
46
+ def main() -> None:
47
+ ap = argparse.ArgumentParser()
48
+ ap.add_argument("--smollm", action="store_true",
49
+ help="Use SmolLM2-360M-Instruct (heavy import).")
50
+ args = ap.parse_args()
51
+
52
+ if args.smollm:
53
+ from recruitertriage.smollm import make_smollm
54
+ llm = make_smollm()
55
+ backend = "SmolLM2-360M-Instruct"
56
+ else:
57
+ llm = HeuristicLLM()
58
+ backend = "HeuristicLLM (zero-dep fallback)"
59
+
60
+ print(f"backend: {backend}\n")
61
+
62
+ for i, (subj, body) in enumerate(SAMPLES, 1):
63
+ d = triage(subject=subj, body=body, llm=llm)
64
+ print(f"[{i}] {subj}")
65
+ print(f" label={d.label.value:<11} conf={d.confidence:.2f} reason={d.reason}")
66
+ if d.suggested_reply:
67
+ print(f" reply: {d.suggested_reply}")
68
+ print()
69
+
70
+
71
+ if __name__ == "__main__":
72
+ main()
@@ -0,0 +1,51 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.24"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "recruitertriage"
7
+ version = "0.1.0"
8
+ description = "Triage recruiter outreach with a small (<1B) language model. Built for the HuggingFace Build Small Hackathon."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Mukunda Rao Katta", email = "mukunda.vjcs6@gmail.com" }]
13
+ keywords = [
14
+ "ai",
15
+ "llm",
16
+ "small-models",
17
+ "huggingface",
18
+ "smollm",
19
+ "recruiter",
20
+ "inbox",
21
+ "triage",
22
+ ]
23
+ classifiers = [
24
+ "Development Status :: 3 - Alpha",
25
+ "Intended Audience :: Developers",
26
+ "License :: OSI Approved :: MIT License",
27
+ "Operating System :: OS Independent",
28
+ "Programming Language :: Python :: 3",
29
+ "Programming Language :: Python :: 3 :: Only",
30
+ "Programming Language :: Python :: 3.10",
31
+ "Programming Language :: Python :: 3.11",
32
+ "Programming Language :: Python :: 3.12",
33
+ "Programming Language :: Python :: 3.13",
34
+ "Topic :: Software Development :: Libraries :: Python Modules",
35
+ ]
36
+ # Core lib has NO heavy deps; you bring your own callable LM.
37
+ # The HF model integration lives in the [smollm] extra.
38
+ dependencies = []
39
+
40
+ [project.optional-dependencies]
41
+ smollm = ["transformers>=4.44", "torch>=2.2", "accelerate>=0.30"]
42
+ space = ["gradio>=4.40", "transformers>=4.44", "torch>=2.2", "accelerate>=0.30"]
43
+ dev = ["pytest>=8.0", "ruff>=0.4"]
44
+
45
+ [project.urls]
46
+ Homepage = "https://github.com/MukundaKatta/recruitertriage"
47
+ Source = "https://github.com/MukundaKatta/recruitertriage"
48
+ Issues = "https://github.com/MukundaKatta/recruitertriage/issues"
49
+
50
+ [tool.hatch.build.targets.wheel]
51
+ packages = ["src/recruitertriage"]
@@ -0,0 +1,118 @@
1
+ """Gradio Space for recruitertriage on HuggingFace.
2
+
3
+ Built for the HuggingFace Build Small Hackathon. Uses
4
+ HuggingFaceTB/SmolLM2-360M-Instruct (under 1B params) as the underlying
5
+ language model. Fits on a free CPU Space.
6
+
7
+ To deploy:
8
+
9
+ 1. `huggingface-cli login` (or use `hf` CLI)
10
+ 2. Create a new Space (Gradio SDK, CPU basic)
11
+ 3. Push this `space/` directory as the Space root.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import os
18
+ import sys
19
+
20
+ # Make the local src/ importable when running the Space directly from
21
+ # this directory (the published Space pins recruitertriage as a normal
22
+ # pip dep via requirements.txt instead).
23
+ _HERE = os.path.dirname(os.path.abspath(__file__))
24
+ _SRC = os.path.normpath(os.path.join(_HERE, "..", "src"))
25
+ if os.path.isdir(_SRC) and _SRC not in sys.path:
26
+ sys.path.insert(0, _SRC)
27
+
28
+ import gradio as gr
29
+
30
+ from recruitertriage import HeuristicLLM, triage
31
+
32
+
33
+ # Lazily build the SmolLM2 model so the Space starts fast and only
34
+ # pays the model-load cost on first request.
35
+ _smollm_cache = {"fn": None}
36
+
37
+
38
+ def _get_llm(backend: str):
39
+ if backend == "heuristic":
40
+ return HeuristicLLM()
41
+ if _smollm_cache["fn"] is None:
42
+ from recruitertriage.smollm import make_smollm
43
+ _smollm_cache["fn"] = make_smollm()
44
+ return _smollm_cache["fn"]
45
+
46
+
47
+ LABEL_EMOJI = {
48
+ "interview": "yes",
49
+ "needs_info": "ask",
50
+ "reject": "no",
51
+ "spam": "block",
52
+ "unsure": "shrug",
53
+ }
54
+
55
+
56
+ def _do_triage(subject: str, body: str, backend: str):
57
+ if not subject.strip() and not body.strip():
58
+ return "Paste a recruiter email above.", "", ""
59
+
60
+ llm = _get_llm(backend)
61
+ d = triage(subject=subject, body=body, llm=llm)
62
+ badge = f"{LABEL_EMOJI.get(d.label.value, '?')} {d.label.value} ({d.confidence:.0%})"
63
+ detail = f"**Reason:** {d.reason}\n\n**Signals:** {json.dumps(d.signals)}"
64
+ reply = d.suggested_reply or "_(no suggested reply)_"
65
+ return badge, detail, reply
66
+
67
+
68
+ SAMPLES = [
69
+ [
70
+ "Staff ML Engineer @ AcmeAI",
71
+ "Hi - we're hiring a Staff ML Engineer to lead agents. "
72
+ "Comp band 250-320k base + equity. Remote PT/PST friendly. "
73
+ "Are you open to a 15-min chat next week?",
74
+ ],
75
+ [
76
+ "Quick question",
77
+ "Hey, would you be open to a chat? Send your resume if "
78
+ "interested!",
79
+ ],
80
+ [
81
+ "Lead gen partnership",
82
+ "Hi, our agency can send you 50 verified candidate emails per "
83
+ "week. Buy emails, save time. Reply YES for pricing.",
84
+ ],
85
+ ]
86
+
87
+
88
+ with gr.Blocks(title="recruitertriage", theme="soft") as demo:
89
+ gr.Markdown(
90
+ "# recruitertriage\n"
91
+ "Triage recruiter outreach with a small (<1B) language model. "
92
+ "Built for the HuggingFace Build Small Hackathon. "
93
+ "Backed by [SmolLM2-360M-Instruct]"
94
+ "(https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct)."
95
+ )
96
+
97
+ with gr.Row():
98
+ with gr.Column():
99
+ subject = gr.Textbox(label="Subject", lines=1)
100
+ body = gr.Textbox(label="Body", lines=10)
101
+ backend = gr.Radio(
102
+ ["smollm", "heuristic"],
103
+ value="smollm",
104
+ label="Backend",
105
+ info="smollm = SmolLM2-360M-Instruct. heuristic = zero-dep keyword fallback (instant).",
106
+ )
107
+ run = gr.Button("Triage", variant="primary")
108
+ with gr.Column():
109
+ label = gr.Markdown(label="Decision")
110
+ detail = gr.Markdown(label="Detail")
111
+ reply = gr.Textbox(label="Suggested reply", lines=4)
112
+
113
+ run.click(_do_triage, [subject, body, backend], [label, detail, reply])
114
+ gr.Examples(examples=SAMPLES, inputs=[subject, body])
115
+
116
+
117
+ if __name__ == "__main__":
118
+ demo.launch()
@@ -0,0 +1,5 @@
1
+ recruitertriage>=0.1.0
2
+ gradio>=4.40
3
+ transformers>=4.44
4
+ torch>=2.2
5
+ accelerate>=0.30
@@ -0,0 +1,41 @@
1
+ """recruitertriage - triage recruiter outreach with a small (<1B) LM.
2
+
3
+ A tiny, dependency-light core. Bring your own callable language model
4
+ (SmolLM2, Qwen-0.5B, your own fine-tune, or even a heuristic).
5
+
6
+ Example:
7
+
8
+ from recruitertriage import triage, Decision
9
+
10
+ def my_llm(prompt: str) -> str:
11
+ # call SmolLM2-360M, return raw text
12
+ ...
13
+
14
+ result: Decision = triage(
15
+ subject="Senior ML role at Acme",
16
+ body="Hi! Are you open to a quick chat about a Senior MLE role...",
17
+ llm=my_llm,
18
+ )
19
+ print(result.label, result.confidence, result.reason)
20
+
21
+ For the HuggingFace Build Small Hackathon entry, see `space/app.py`
22
+ for a Gradio UI bundled with SmolLM2-360M-Instruct.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from .core import (
28
+ Decision,
29
+ HeuristicLLM,
30
+ Label,
31
+ triage,
32
+ )
33
+
34
+ __all__ = [
35
+ "Decision",
36
+ "HeuristicLLM",
37
+ "Label",
38
+ "triage",
39
+ ]
40
+
41
+ __version__ = "0.1.0"
@@ -0,0 +1,253 @@
1
+ """Core triage logic. No heavy deps — bring your own LM."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass, field
8
+ from enum import Enum
9
+ from typing import Callable, Iterable
10
+
11
+
12
+ # ---- decision shape -------------------------------------------------------
13
+
14
+
15
+ class Label(str, Enum):
16
+ """Five buckets a recruiter outreach can fall into.
17
+
18
+ `interview` - looks like a strong fit, schedule a call
19
+ `needs_info` - interesting but missing key details (role, comp, location)
20
+ `reject` - clearly off (wrong stack, mass blast, sketchy)
21
+ `spam` - not a real role / vendor pitch / sales
22
+ `unsure` - the model couldn't decide; fall through to human
23
+ """
24
+
25
+ interview = "interview"
26
+ needs_info = "needs_info"
27
+ reject = "reject"
28
+ spam = "spam"
29
+ unsure = "unsure"
30
+
31
+
32
+ @dataclass
33
+ class Decision:
34
+ label: Label
35
+ confidence: float # 0..1
36
+ reason: str
37
+ suggested_reply: str | None = None
38
+ signals: dict[str, object] = field(default_factory=dict)
39
+
40
+
41
+ LLM = Callable[[str], str]
42
+ """A callable language model: takes a prompt string, returns raw text."""
43
+
44
+
45
+ # ---- prompt ---------------------------------------------------------------
46
+
47
+
48
+ _SYSTEM = (
49
+ "You triage recruiter outreach. Pick exactly one label from: "
50
+ "interview, needs_info, reject, spam, unsure. "
51
+ "Return JSON ONLY with keys: label, confidence (0..1), reason (one short line), "
52
+ "suggested_reply (1-3 sentences, plain, no AI phrases). "
53
+ "If the email is vendor sales, pitch, or unrelated to a job role, return spam. "
54
+ "If it lacks role title or comp range AND looks legit, return needs_info. "
55
+ "If the stack/level is clearly wrong, return reject. "
56
+ "If it looks like a real job match, return interview."
57
+ )
58
+
59
+
60
+ def _build_prompt(subject: str, body: str, hints: dict[str, object] | None) -> str:
61
+ hints_block = ""
62
+ if hints:
63
+ hints_block = "\n\nUser hints:\n" + "\n".join(
64
+ f"- {k}: {v}" for k, v in hints.items()
65
+ )
66
+ return (
67
+ f"{_SYSTEM}\n\n"
68
+ f"Subject: {subject}\n\n"
69
+ f"Body:\n{body}{hints_block}\n\n"
70
+ f"JSON:"
71
+ )
72
+
73
+
74
+ # ---- parsing --------------------------------------------------------------
75
+
76
+
77
+ _JSON_RE = re.compile(r"\{.*?\}", re.DOTALL)
78
+
79
+
80
+ def _parse_llm_json(raw: str) -> dict | None:
81
+ """Pull the first JSON object out of a model's free-form text.
82
+
83
+ Small models love to wrap things in prose, code fences, or extra
84
+ commentary. Find the first {...} block and try to parse it."""
85
+ # try whole-string first (well-behaved model)
86
+ raw = raw.strip()
87
+ try:
88
+ return json.loads(raw)
89
+ except Exception:
90
+ pass
91
+ m = _JSON_RE.search(raw)
92
+ if not m:
93
+ return None
94
+ try:
95
+ return json.loads(m.group(0))
96
+ except Exception:
97
+ return None
98
+
99
+
100
+ def _coerce_decision(obj: dict | None) -> Decision:
101
+ if not isinstance(obj, dict):
102
+ return Decision(Label.unsure, 0.0, "model output unparseable")
103
+
104
+ label_raw = str(obj.get("label", "")).strip().lower()
105
+ try:
106
+ label = Label(label_raw)
107
+ except ValueError:
108
+ label = Label.unsure
109
+
110
+ try:
111
+ conf = float(obj.get("confidence", 0.0))
112
+ except (TypeError, ValueError):
113
+ conf = 0.0
114
+ conf = max(0.0, min(1.0, conf))
115
+
116
+ reason = str(obj.get("reason", "")).strip() or "no reason"
117
+ reply = obj.get("suggested_reply")
118
+ if reply is not None:
119
+ reply = str(reply).strip() or None
120
+
121
+ return Decision(label, conf, reason, suggested_reply=reply)
122
+
123
+
124
+ # ---- public entry ---------------------------------------------------------
125
+
126
+
127
+ def triage(
128
+ *,
129
+ subject: str,
130
+ body: str,
131
+ llm: LLM,
132
+ hints: dict[str, object] | None = None,
133
+ ) -> Decision:
134
+ """Triage one piece of recruiter outreach.
135
+
136
+ Pass any callable that maps prompt-string -> raw-string output.
137
+ Small models (SmolLM2, Qwen-0.5B, etc.) are the target."""
138
+ prompt = _build_prompt(subject=subject, body=body, hints=hints)
139
+ raw = llm(prompt)
140
+ parsed = _parse_llm_json(raw)
141
+ decision = _coerce_decision(parsed)
142
+ decision.signals["prompt_chars"] = len(prompt)
143
+ decision.signals["raw_chars"] = len(raw)
144
+ return decision
145
+
146
+
147
+ # ---- offline / heuristic fallback LM --------------------------------------
148
+
149
+
150
+ _INTERVIEW_HINTS = (
151
+ "principal",
152
+ "staff",
153
+ "senior",
154
+ "lead",
155
+ "ml engineer",
156
+ "machine learning",
157
+ "ai engineer",
158
+ "applied scientist",
159
+ "research engineer",
160
+ )
161
+
162
+ _REJECT_HINTS = (
163
+ "java",
164
+ ".net",
165
+ "salesforce admin",
166
+ "qa manual",
167
+ "support engineer",
168
+ "tier 1 support",
169
+ )
170
+
171
+ _SPAM_HINTS = (
172
+ "purchase order",
173
+ "lead gen",
174
+ "we have leads",
175
+ "guaranteed candidates",
176
+ "buy emails",
177
+ "outsource your hiring",
178
+ "verified bitcoin",
179
+ "crypto investment",
180
+ )
181
+
182
+ _INFO_HINTS = (
183
+ "more info",
184
+ "let me know if interested",
185
+ "send your resume",
186
+ "open to a chat",
187
+ )
188
+
189
+
190
+ class HeuristicLLM:
191
+ """Zero-dep fallback 'model'. Pattern-matches on keywords and emits
192
+ a JSON-shaped string so the core pipeline still works without
193
+ transformers/torch installed. Good for tests, CI, and offline demos.
194
+
195
+ Don't ship this to prod. Plug in SmolLM2 or similar for real runs."""
196
+
197
+ def __call__(self, prompt: str) -> str:
198
+ body = prompt.lower()
199
+
200
+ if any(s in body for s in _SPAM_HINTS):
201
+ return json.dumps({
202
+ "label": "spam",
203
+ "confidence": 0.85,
204
+ "reason": "matched spam keyword",
205
+ "suggested_reply": None,
206
+ })
207
+ if any(s in body for s in _REJECT_HINTS):
208
+ return json.dumps({
209
+ "label": "reject",
210
+ "confidence": 0.78,
211
+ "reason": "stack/level mismatch",
212
+ "suggested_reply": (
213
+ "Thanks for reaching out. Not the right fit for me right now."
214
+ ),
215
+ })
216
+ if any(s in body for s in _INTERVIEW_HINTS):
217
+ return json.dumps({
218
+ "label": "interview",
219
+ "confidence": 0.72,
220
+ "reason": "title/level looks like a fit",
221
+ "suggested_reply": (
222
+ "Thanks, this looks interesting. Could you share the role "
223
+ "level, comp band, and location?"
224
+ ),
225
+ })
226
+ if any(s in body for s in _INFO_HINTS):
227
+ return json.dumps({
228
+ "label": "needs_info",
229
+ "confidence": 0.6,
230
+ "reason": "no role title or details given",
231
+ "suggested_reply": (
232
+ "Happy to chat. Could you share the company, role title, "
233
+ "and comp band first?"
234
+ ),
235
+ })
236
+ return json.dumps({
237
+ "label": "unsure",
238
+ "confidence": 0.3,
239
+ "reason": "no clear signals",
240
+ })
241
+
242
+
243
+ # ---- batch helpers --------------------------------------------------------
244
+
245
+
246
+ def triage_batch(
247
+ emails: Iterable[tuple[str, str]],
248
+ *,
249
+ llm: LLM,
250
+ hints: dict[str, object] | None = None,
251
+ ) -> list[Decision]:
252
+ """Triage a list of (subject, body) tuples. Pure convenience."""
253
+ return [triage(subject=s, body=b, llm=llm, hints=hints) for s, b in emails]
@@ -0,0 +1,76 @@
1
+ """SmolLM2 (HuggingFaceTB/SmolLM2-360M-Instruct) integration.
2
+
3
+ Lives behind an extra: `pip install "recruitertriage[smollm]"`.
4
+
5
+ The default model is small enough to run on a laptop CPU; for the
6
+ HuggingFace Build Small Hackathon we explicitly target sub-1B
7
+ parameter models. Pass any other instruction-tuned causal LM by name."""
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any
12
+
13
+
14
+ def make_smollm(
15
+ model_id: str = "HuggingFaceTB/SmolLM2-360M-Instruct",
16
+ *,
17
+ device: str | None = None,
18
+ max_new_tokens: int = 220,
19
+ temperature: float = 0.2,
20
+ ):
21
+ """Build a callable LM around a HuggingFace causal model.
22
+
23
+ Returns a function `(prompt: str) -> str` that you can pass straight
24
+ into `recruitertriage.triage(..., llm=...)`."""
25
+ try:
26
+ import torch
27
+ from transformers import AutoModelForCausalLM, AutoTokenizer
28
+ except Exception as e: # pragma: no cover - import guard
29
+ raise RuntimeError(
30
+ 'recruitertriage[smollm] extras are required for SmolLM2. '
31
+ 'Install with: pip install "recruitertriage[smollm]"'
32
+ ) from e
33
+
34
+ if device is None:
35
+ if torch.cuda.is_available():
36
+ device = "cuda"
37
+ elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
38
+ device = "mps"
39
+ else:
40
+ device = "cpu"
41
+
42
+ tok = AutoTokenizer.from_pretrained(model_id)
43
+ model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
44
+ model.eval()
45
+
46
+ def _call(prompt: str) -> str:
47
+ # SmolLM2-Instruct expects a chat template; the prompt becomes
48
+ # a single user message. We deliberately keep the system message
49
+ # baked into `prompt` so callers can swap models without breaking
50
+ # the template.
51
+ messages = [{"role": "user", "content": prompt}]
52
+ rendered = tok.apply_chat_template(
53
+ messages, tokenize=False, add_generation_prompt=True
54
+ )
55
+ inputs: dict[str, Any] = tok(rendered, return_tensors="pt").to(device)
56
+ with torch_no_grad():
57
+ out = model.generate(
58
+ **inputs,
59
+ max_new_tokens=max_new_tokens,
60
+ do_sample=temperature > 0,
61
+ temperature=temperature,
62
+ pad_token_id=tok.eos_token_id,
63
+ )
64
+ # only decode the newly-generated tokens
65
+ new = out[0, inputs["input_ids"].shape[1]:]
66
+ return tok.decode(new, skip_special_tokens=True).strip()
67
+
68
+ return _call
69
+
70
+
71
+ def torch_no_grad():
72
+ """Lazy torch.no_grad() so importing this module is cheap when torch
73
+ isn't installed (the make_smollm call above already imported it)."""
74
+ import torch # noqa: PLC0415
75
+
76
+ return torch.no_grad()
@@ -0,0 +1,141 @@
1
+ """Tests for recruitertriage core triage logic.
2
+
3
+ These tests don't need transformers/torch — they exercise the
4
+ heuristic LM and the JSON parser directly."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+
10
+ import pytest
11
+
12
+ from recruitertriage import Decision, HeuristicLLM, Label, triage
13
+ from recruitertriage.core import _coerce_decision, _parse_llm_json, triage_batch
14
+
15
+
16
+ # ---- _parse_llm_json ------------------------------------------------------
17
+
18
+
19
+ def test_parse_clean_json():
20
+ obj = _parse_llm_json('{"label":"interview","confidence":0.8}')
21
+ assert obj == {"label": "interview", "confidence": 0.8}
22
+
23
+
24
+ def test_parse_json_buried_in_prose():
25
+ raw = 'Sure! Here is the result:\n```json\n{"label":"reject"}\n```\nHope that helps!'
26
+ obj = _parse_llm_json(raw)
27
+ assert obj == {"label": "reject"}
28
+
29
+
30
+ def test_parse_garbage_returns_none():
31
+ assert _parse_llm_json("totally not json") is None
32
+
33
+
34
+ # ---- _coerce_decision -----------------------------------------------------
35
+
36
+
37
+ def test_coerce_unknown_label_falls_back_to_unsure():
38
+ d = _coerce_decision({"label": "yolo", "confidence": 0.9, "reason": "x"})
39
+ assert d.label is Label.unsure
40
+
41
+
42
+ def test_coerce_clamps_confidence():
43
+ d = _coerce_decision({"label": "interview", "confidence": 5.0, "reason": "x"})
44
+ assert d.confidence == 1.0
45
+ d2 = _coerce_decision({"label": "interview", "confidence": -1.0, "reason": "x"})
46
+ assert d2.confidence == 0.0
47
+
48
+
49
+ def test_coerce_none_returns_unsure():
50
+ d = _coerce_decision(None)
51
+ assert d.label is Label.unsure
52
+ assert d.confidence == 0.0
53
+
54
+
55
+ # ---- HeuristicLLM end-to-end ---------------------------------------------
56
+
57
+
58
+ @pytest.mark.parametrize(
59
+ "subject,body,expected",
60
+ [
61
+ # spam
62
+ (
63
+ "We have leads",
64
+ "We sell verified bitcoin investment opportunities to your team.",
65
+ Label.spam,
66
+ ),
67
+ # reject (stack mismatch)
68
+ (
69
+ "Java backend role at BankCorp",
70
+ "Hi, we're hiring a Java developer for our trading platform.",
71
+ Label.reject,
72
+ ),
73
+ # interview (clear fit)
74
+ (
75
+ "Staff ML Engineer at AcmeAI",
76
+ "Looking for a Staff ML Engineer to lead our agents team. "
77
+ "Comp 250-320k base + equity.",
78
+ Label.interview,
79
+ ),
80
+ # needs_info (vague but legit)
81
+ (
82
+ "Quick question",
83
+ "Hey, would you be open to a chat? Send your resume "
84
+ "if interested!",
85
+ Label.needs_info,
86
+ ),
87
+ ],
88
+ )
89
+ def test_heuristic_llm_round_trip(subject: str, body: str, expected: Label):
90
+ d = triage(subject=subject, body=body, llm=HeuristicLLM())
91
+ assert d.label is expected
92
+ assert 0.0 <= d.confidence <= 1.0
93
+ assert d.reason
94
+
95
+
96
+ def test_unsure_when_no_signals():
97
+ d = triage(
98
+ subject="Hello",
99
+ body="Hi, just saying hi.",
100
+ llm=HeuristicLLM(),
101
+ )
102
+ assert d.label is Label.unsure
103
+
104
+
105
+ # ---- BYO-LLM contract -----------------------------------------------------
106
+
107
+
108
+ def test_byo_llm_callable_works():
109
+ def fake_llm(prompt: str) -> str:
110
+ return json.dumps({
111
+ "label": "interview",
112
+ "confidence": 0.91,
113
+ "reason": "test fixture",
114
+ "suggested_reply": "thanks, send details",
115
+ })
116
+
117
+ d = triage(subject="hi", body="hi", llm=fake_llm)
118
+ assert d.label is Label.interview
119
+ assert d.confidence == pytest.approx(0.91)
120
+ assert d.suggested_reply == "thanks, send details"
121
+
122
+
123
+ def test_signals_captured():
124
+ d = triage(subject="x", body="y", llm=HeuristicLLM())
125
+ assert "prompt_chars" in d.signals
126
+ assert "raw_chars" in d.signals
127
+ assert isinstance(d.signals["prompt_chars"], int)
128
+
129
+
130
+ # ---- batch ---------------------------------------------------------------
131
+
132
+
133
+ def test_triage_batch():
134
+ pairs = [
135
+ ("Staff MLE role", "Senior Machine Learning Engineer at AcmeAI"),
136
+ ("Java dev", "We need a Java backend dev"),
137
+ ]
138
+ results = triage_batch(pairs, llm=HeuristicLLM())
139
+ assert len(results) == 2
140
+ assert results[0].label is Label.interview
141
+ assert results[1].label is Label.reject