profgen 0.0.1rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- profgen-0.0.1rc1/.coveragerc +28 -0
- profgen-0.0.1rc1/.github/workflows/ci.yml +145 -0
- profgen-0.0.1rc1/.gitignore +73 -0
- profgen-0.0.1rc1/.pre-commit-config.yaml +29 -0
- profgen-0.0.1rc1/.readthedocs.yml +24 -0
- profgen-0.0.1rc1/AUTHORS.md +3 -0
- profgen-0.0.1rc1/CHANGELOG.md +170 -0
- profgen-0.0.1rc1/CONTRIBUTING.md +31 -0
- profgen-0.0.1rc1/LICENSE.txt +21 -0
- profgen-0.0.1rc1/MANIFEST.in +9 -0
- profgen-0.0.1rc1/Makefile +374 -0
- profgen-0.0.1rc1/PKG-INFO +209 -0
- profgen-0.0.1rc1/README.md +162 -0
- profgen-0.0.1rc1/docs/Makefile +29 -0
- profgen-0.0.1rc1/docs/_static/.gitignore +1 -0
- profgen-0.0.1rc1/docs/authors.md +3 -0
- profgen-0.0.1rc1/docs/changelog.md +3 -0
- profgen-0.0.1rc1/docs/conf.py +147 -0
- profgen-0.0.1rc1/docs/contributing.md +3 -0
- profgen-0.0.1rc1/docs/index.md +33 -0
- profgen-0.0.1rc1/docs/license.md +5 -0
- profgen-0.0.1rc1/docs/readme.md +3 -0
- profgen-0.0.1rc1/docs/userguide.md +463 -0
- profgen-0.0.1rc1/examples/build_example_profile.py +51 -0
- profgen-0.0.1rc1/examples/input_cvs/sample_cv.txt +48 -0
- profgen-0.0.1rc1/examples/smoke_real_path.py +58 -0
- profgen-0.0.1rc1/examples/style-map.example.toml +16 -0
- profgen-0.0.1rc1/pyproject.toml +121 -0
- profgen-0.0.1rc1/setup.cfg +4 -0
- profgen-0.0.1rc1/src/profgen/__init__.py +43 -0
- profgen-0.0.1rc1/src/profgen/__main__.py +6 -0
- profgen-0.0.1rc1/src/profgen/_version.py +24 -0
- profgen-0.0.1rc1/src/profgen/cli.py +143 -0
- profgen-0.0.1rc1/src/profgen/extractors/__init__.py +58 -0
- profgen-0.0.1rc1/src/profgen/extractors/base.py +47 -0
- profgen-0.0.1rc1/src/profgen/extractors/docx.py +43 -0
- profgen-0.0.1rc1/src/profgen/extractors/pdf.py +46 -0
- profgen-0.0.1rc1/src/profgen/extractors/txt.py +26 -0
- profgen-0.0.1rc1/src/profgen/llm/__init__.py +26 -0
- profgen-0.0.1rc1/src/profgen/llm/claude_client.py +361 -0
- profgen-0.0.1rc1/src/profgen/llm/prompts.py +72 -0
- profgen-0.0.1rc1/src/profgen/models/__init__.py +21 -0
- profgen-0.0.1rc1/src/profgen/models/candidate.py +100 -0
- profgen-0.0.1rc1/src/profgen/pipeline.py +273 -0
- profgen-0.0.1rc1/src/profgen/template/__init__.py +37 -0
- profgen-0.0.1rc1/src/profgen/template/word_renderer.py +355 -0
- profgen-0.0.1rc1/src/profgen.egg-info/PKG-INFO +209 -0
- profgen-0.0.1rc1/src/profgen.egg-info/SOURCES.txt +73 -0
- profgen-0.0.1rc1/src/profgen.egg-info/dependency_links.txt +1 -0
- profgen-0.0.1rc1/src/profgen.egg-info/entry_points.txt +3 -0
- profgen-0.0.1rc1/src/profgen.egg-info/requires.txt +25 -0
- profgen-0.0.1rc1/src/profgen.egg-info/top_level.txt +1 -0
- profgen-0.0.1rc1/tests/README.md +11 -0
- profgen-0.0.1rc1/tests/conftest.py +53 -0
- profgen-0.0.1rc1/tests/fixtures/__init__.py +33 -0
- profgen-0.0.1rc1/tests/fixtures/build_corpus.py +32 -0
- profgen-0.0.1rc1/tests/fixtures/builders.py +224 -0
- profgen-0.0.1rc1/tests/fixtures/personas.py +400 -0
- profgen-0.0.1rc1/tests/integration/test_convert_cli.py +112 -0
- profgen-0.0.1rc1/tests/integration/test_extractors.py +83 -0
- profgen-0.0.1rc1/tests/integration/test_fixtures.py +102 -0
- profgen-0.0.1rc1/tests/integration/test_grounding_corpus.py +105 -0
- profgen-0.0.1rc1/tests/integration/test_layout.py +12 -0
- profgen-0.0.1rc1/tests/integration/test_pipeline_run.py +233 -0
- profgen-0.0.1rc1/tests/integration/test_rendering_corpus.py +184 -0
- profgen-0.0.1rc1/tests/integration/test_structuring_corpus.py +161 -0
- profgen-0.0.1rc1/tests/unit/test_candidate.py +117 -0
- profgen-0.0.1rc1/tests/unit/test_cli.py +64 -0
- profgen-0.0.1rc1/tests/unit/test_extractor_dispatch.py +86 -0
- profgen-0.0.1rc1/tests/unit/test_grounding.py +247 -0
- profgen-0.0.1rc1/tests/unit/test_import.py +7 -0
- profgen-0.0.1rc1/tests/unit/test_pipeline_paths.py +36 -0
- profgen-0.0.1rc1/tests/unit/test_structuring.py +175 -0
- profgen-0.0.1rc1/tests/unit/test_style_map.py +64 -0
- profgen-0.0.1rc1/tox.ini +102 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# .coveragerc to control coverage.py
|
|
2
|
+
[run]
|
|
3
|
+
branch = True
|
|
4
|
+
source = profgen
|
|
5
|
+
# omit = bad_file.py
|
|
6
|
+
|
|
7
|
+
[paths]
|
|
8
|
+
source =
|
|
9
|
+
src/
|
|
10
|
+
*/site-packages/
|
|
11
|
+
|
|
12
|
+
[report]
|
|
13
|
+
# Regexes for lines to exclude from consideration
|
|
14
|
+
exclude_lines =
|
|
15
|
+
# Have to re-enable the standard pragma
|
|
16
|
+
pragma: no cover
|
|
17
|
+
|
|
18
|
+
# Don't complain about missing debug-only code:
|
|
19
|
+
def __repr__
|
|
20
|
+
if self\.debug
|
|
21
|
+
|
|
22
|
+
# Don't complain if tests don't hit defensive assertion code:
|
|
23
|
+
raise AssertionError
|
|
24
|
+
raise NotImplementedError
|
|
25
|
+
|
|
26
|
+
# Don't complain if non-runnable code isn't run:
|
|
27
|
+
if 0:
|
|
28
|
+
if __name__ == .__main__.:
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# GitHub Actions configuration **EXAMPLE**,
|
|
2
|
+
# MODIFY IT ACCORDING TO YOUR NEEDS!
|
|
3
|
+
# Reference: https://docs.github.com/en/actions
|
|
4
|
+
|
|
5
|
+
name: tests
|
|
6
|
+
|
|
7
|
+
on:
|
|
8
|
+
push:
|
|
9
|
+
# Avoid using all the resources/limits available by checking only
|
|
10
|
+
# relevant branches and tags. Other branches can be checked via PRs.
|
|
11
|
+
branches: [main]
|
|
12
|
+
tags: ['v[0-9]*', '[0-9]+.[0-9]+*'] # Match tags that resemble a version
|
|
13
|
+
pull_request: # Run in every PR
|
|
14
|
+
workflow_dispatch: # Allow manually triggering the workflow
|
|
15
|
+
schedule:
|
|
16
|
+
# Run roughly every 15 days at 00:00 UTC
|
|
17
|
+
# (useful to check if updates on dependencies break the package)
|
|
18
|
+
- cron: '0 0 1,16 * *'
|
|
19
|
+
|
|
20
|
+
permissions:
|
|
21
|
+
contents: read
|
|
22
|
+
|
|
23
|
+
concurrency:
|
|
24
|
+
group: >-
|
|
25
|
+
${{ github.workflow }}-${{ github.ref_type }}-
|
|
26
|
+
${{ github.event.pull_request.number || github.sha }}
|
|
27
|
+
cancel-in-progress: true
|
|
28
|
+
|
|
29
|
+
jobs:
|
|
30
|
+
prepare:
|
|
31
|
+
runs-on: ubuntu-latest
|
|
32
|
+
outputs:
|
|
33
|
+
wheel-distribution: ${{ steps.wheel-distribution.outputs.path }}
|
|
34
|
+
steps:
|
|
35
|
+
- uses: actions/checkout@v4
|
|
36
|
+
with: {fetch-depth: 0} # deep clone for setuptools-scm
|
|
37
|
+
- uses: actions/setup-python@v5
|
|
38
|
+
id: setup-python
|
|
39
|
+
with: {python-version: "3.13"}
|
|
40
|
+
- name: Run static analysis and format checkers
|
|
41
|
+
run: pipx run pre-commit run --all-files --show-diff-on-failure
|
|
42
|
+
- name: Type-check with mypy --strict
|
|
43
|
+
run: >-
|
|
44
|
+
pipx run --python '${{ steps.setup-python.outputs.python-path }}'
|
|
45
|
+
tox -e typecheck
|
|
46
|
+
- name: Build package distribution files
|
|
47
|
+
run: >-
|
|
48
|
+
pipx run --python '${{ steps.setup-python.outputs.python-path }}'
|
|
49
|
+
tox -e clean,build
|
|
50
|
+
- name: Record the path of wheel distribution
|
|
51
|
+
id: wheel-distribution
|
|
52
|
+
run: echo "path=$(ls dist/*.whl)" >> $GITHUB_OUTPUT
|
|
53
|
+
- name: Store the distribution files for use in other stages
|
|
54
|
+
# `tests` and `publish` will use the same pre-built distributions,
|
|
55
|
+
# so we make sure to release the exact same package that was tested
|
|
56
|
+
uses: actions/upload-artifact@v4
|
|
57
|
+
with:
|
|
58
|
+
name: python-distribution-files
|
|
59
|
+
path: dist/
|
|
60
|
+
retention-days: 1
|
|
61
|
+
|
|
62
|
+
test:
|
|
63
|
+
needs: prepare
|
|
64
|
+
strategy:
|
|
65
|
+
matrix:
|
|
66
|
+
python:
|
|
67
|
+
- "3.13" # minimum supported (see requires-python in pyproject.toml)
|
|
68
|
+
platform:
|
|
69
|
+
- ubuntu-latest
|
|
70
|
+
- macos-latest
|
|
71
|
+
- windows-latest
|
|
72
|
+
runs-on: ${{ matrix.platform }}
|
|
73
|
+
steps:
|
|
74
|
+
- uses: actions/checkout@v4
|
|
75
|
+
- uses: actions/setup-python@v5
|
|
76
|
+
id: setup-python
|
|
77
|
+
with:
|
|
78
|
+
python-version: ${{ matrix.python }}
|
|
79
|
+
- name: Retrieve pre-built distribution files
|
|
80
|
+
uses: actions/download-artifact@v4
|
|
81
|
+
with: {name: python-distribution-files, path: dist/}
|
|
82
|
+
- name: Run tests
|
|
83
|
+
run: >-
|
|
84
|
+
pipx run --python '${{ steps.setup-python.outputs.python-path }}'
|
|
85
|
+
tox --installpkg '${{ needs.prepare.outputs.wheel-distribution }}'
|
|
86
|
+
-- -rFEx --durations 10 --color yes # pytest args
|
|
87
|
+
- name: Generate coverage report
|
|
88
|
+
run: pipx run coverage lcov -o coverage.lcov
|
|
89
|
+
- name: Upload partial coverage report
|
|
90
|
+
continue-on-error: true # Coveralls not configured for this repo; don't fail CI on it
|
|
91
|
+
uses: coverallsapp/github-action@master
|
|
92
|
+
with:
|
|
93
|
+
path-to-lcov: coverage.lcov
|
|
94
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
95
|
+
flag-name: ${{ matrix.platform }} - py${{ matrix.python }}
|
|
96
|
+
parallel: true
|
|
97
|
+
|
|
98
|
+
finalize:
|
|
99
|
+
needs: test
|
|
100
|
+
runs-on: ubuntu-latest
|
|
101
|
+
steps:
|
|
102
|
+
- name: Finalize coverage report
|
|
103
|
+
continue-on-error: true # Coveralls not configured for this repo; don't fail CI on it
|
|
104
|
+
uses: coverallsapp/github-action@master
|
|
105
|
+
with:
|
|
106
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
107
|
+
parallel-finished: true
|
|
108
|
+
|
|
109
|
+
# Final version tags (e.g. v0.0.1) -> real PyPI. Pre-release tags (…rc…) are
|
|
110
|
+
# excluded here and routed to TestPyPI by the publish-testpypi job below.
|
|
111
|
+
publish:
|
|
112
|
+
needs: finalize
|
|
113
|
+
if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && !contains(github.ref, 'rc') }}
|
|
114
|
+
runs-on: ubuntu-latest
|
|
115
|
+
# Trusted Publishing (OIDC) — no API token/secret. The PyPI publisher is
|
|
116
|
+
# registered against this repo's `ci.yml` workflow and the `pypi` environment.
|
|
117
|
+
environment: pypi
|
|
118
|
+
permissions:
|
|
119
|
+
id-token: write # required for the OIDC token exchange with PyPI
|
|
120
|
+
contents: read
|
|
121
|
+
steps:
|
|
122
|
+
- name: Retrieve pre-built distribution files
|
|
123
|
+
uses: actions/download-artifact@v4
|
|
124
|
+
with: {name: python-distribution-files, path: dist/}
|
|
125
|
+
- name: Publish to PyPI (Trusted Publishing)
|
|
126
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
127
|
+
|
|
128
|
+
# Pre-release tags (e.g. v0.0.1rc1) -> TestPyPI dry-run. Same OIDC mechanism,
|
|
129
|
+
# registered against this repo's `ci.yml` workflow and the `testpypi` environment.
|
|
130
|
+
publish-testpypi:
|
|
131
|
+
needs: finalize
|
|
132
|
+
if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && contains(github.ref, 'rc') }}
|
|
133
|
+
runs-on: ubuntu-latest
|
|
134
|
+
environment: testpypi
|
|
135
|
+
permissions:
|
|
136
|
+
id-token: write
|
|
137
|
+
contents: read
|
|
138
|
+
steps:
|
|
139
|
+
- name: Retrieve pre-built distribution files
|
|
140
|
+
uses: actions/download-artifact@v4
|
|
141
|
+
with: {name: python-distribution-files, path: dist/}
|
|
142
|
+
- name: Publish to TestPyPI (Trusted Publishing, dry-run)
|
|
143
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
144
|
+
with:
|
|
145
|
+
repository-url: https://test.pypi.org/legacy/
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Real candidate CVs — contain PII (names, emails, phone numbers).
|
|
2
|
+
# NEVER commit these. Anonymised, generated fixtures live elsewhere.
|
|
3
|
+
tests/samples/
|
|
4
|
+
|
|
5
|
+
# Local preview output of the synthetic CV corpus (regenerable).
|
|
6
|
+
.fixture_preview/
|
|
7
|
+
|
|
8
|
+
# Generated example profiles (regenerable via examples/build_example_profile.py).
|
|
9
|
+
examples/output_profiles/
|
|
10
|
+
|
|
11
|
+
# Temporary and binary files
|
|
12
|
+
*~
|
|
13
|
+
*.py[cod]
|
|
14
|
+
*.so
|
|
15
|
+
*.cfg
|
|
16
|
+
!.isort.cfg
|
|
17
|
+
!setup.cfg
|
|
18
|
+
*.orig
|
|
19
|
+
*.log
|
|
20
|
+
*.pot
|
|
21
|
+
__pycache__/*
|
|
22
|
+
.cache/*
|
|
23
|
+
.*.swp
|
|
24
|
+
*/.ipynb_checkpoints/*
|
|
25
|
+
.DS_Store
|
|
26
|
+
|
|
27
|
+
# Project files
|
|
28
|
+
.ropeproject
|
|
29
|
+
.project
|
|
30
|
+
.pydevproject
|
|
31
|
+
.settings
|
|
32
|
+
.idea
|
|
33
|
+
.vscode
|
|
34
|
+
tags
|
|
35
|
+
|
|
36
|
+
# Package files
|
|
37
|
+
*.egg
|
|
38
|
+
*.eggs/
|
|
39
|
+
.installed.cfg
|
|
40
|
+
*.egg-info
|
|
41
|
+
|
|
42
|
+
# Unittest and coverage
|
|
43
|
+
htmlcov/*
|
|
44
|
+
.coverage
|
|
45
|
+
.coverage.*
|
|
46
|
+
.tox
|
|
47
|
+
junit*.xml
|
|
48
|
+
coverage.xml
|
|
49
|
+
.pytest_cache/
|
|
50
|
+
|
|
51
|
+
# Build and docs folder/files
|
|
52
|
+
build/*
|
|
53
|
+
dist/*
|
|
54
|
+
sdist/*
|
|
55
|
+
docs/api/*
|
|
56
|
+
docs/_rst/*
|
|
57
|
+
docs/_build/*
|
|
58
|
+
cover/*
|
|
59
|
+
MANIFEST
|
|
60
|
+
|
|
61
|
+
# Per-project virtualenvs
|
|
62
|
+
.venv*/
|
|
63
|
+
.conda*/
|
|
64
|
+
.python-version
|
|
65
|
+
# Generated by setuptools_scm
|
|
66
|
+
src/profgen/_version.py
|
|
67
|
+
|
|
68
|
+
# Local secrets / API keys — never commit
|
|
69
|
+
.env
|
|
70
|
+
.env.*
|
|
71
|
+
|
|
72
|
+
# Private branded assets for `make profile` (confidential templates + style maps)
|
|
73
|
+
local/
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
ci:
|
|
2
|
+
autoupdate_schedule: quarterly
|
|
3
|
+
|
|
4
|
+
default_language_version:
|
|
5
|
+
python: python3.13
|
|
6
|
+
|
|
7
|
+
repos:
|
|
8
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
9
|
+
rev: v0.6.9 # pin to a tag; update as needed
|
|
10
|
+
hooks:
|
|
11
|
+
- id: ruff
|
|
12
|
+
args: [--fix, --exit-non-zero-on-fix]
|
|
13
|
+
- id: ruff-format
|
|
14
|
+
|
|
15
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
16
|
+
rev: v4.6.0 # pin to a tag; update as needed
|
|
17
|
+
hooks:
|
|
18
|
+
- id: check-added-large-files
|
|
19
|
+
- id: end-of-file-fixer
|
|
20
|
+
- id: trailing-whitespace
|
|
21
|
+
- id: check-merge-conflict
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# If you want Black as well, uncomment this block and keep its rev pinned
|
|
25
|
+
# - repo: https://github.com/psf/black
|
|
26
|
+
# rev: 24.10.0 # example pinned release
|
|
27
|
+
# hooks:
|
|
28
|
+
# - id: black
|
|
29
|
+
# args: ["--line-length=100"]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Read the Docs configuration file
|
|
2
|
+
# https://docs.readthedocs.io/en/stable/config-file/v2.html
|
|
3
|
+
|
|
4
|
+
version: 2
|
|
5
|
+
|
|
6
|
+
build:
|
|
7
|
+
os: ubuntu-22.04
|
|
8
|
+
tools:
|
|
9
|
+
python: "3.13"
|
|
10
|
+
|
|
11
|
+
sphinx:
|
|
12
|
+
configuration: docs/conf.py
|
|
13
|
+
|
|
14
|
+
formats:
|
|
15
|
+
- pdf
|
|
16
|
+
|
|
17
|
+
python:
|
|
18
|
+
install:
|
|
19
|
+
# Install the package with its `docs` extra (single source of truth in
|
|
20
|
+
# pyproject.toml). No docs/requirements.txt.
|
|
21
|
+
- method: pip
|
|
22
|
+
path: .
|
|
23
|
+
extras:
|
|
24
|
+
- docs
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## Unreleased
|
|
4
|
+
|
|
5
|
+
### De-brand & bring-your-own-template
|
|
6
|
+
|
|
7
|
+
- Remove all brand-specific naming from the user-facing surface ahead of the
|
|
8
|
+
public PyPI release: the output is now described as a generic "standardised
|
|
9
|
+
Word profile". The pipeline, the no-invented-facts rule, employer anonymisation
|
|
10
|
+
(`Project N | <domain>`) and the grounding check are unchanged.
|
|
11
|
+
- Render against **five logical roles** (`title`, `date_heading`, `body`,
|
|
12
|
+
`bullet`, `legal`) resolved to paragraph-style names through a *style map*. The
|
|
13
|
+
neutral `DEFAULT_STYLE_MAP` maps them to `Profile Title`, `Profile Date`,
|
|
14
|
+
`Normal`, `List Bullet` and `Profile Legal`.
|
|
15
|
+
- **Bring your own template:** `convert --template my.docx --style-map my.toml`
|
|
16
|
+
renders against your branded document, with the TOML file mapping the five roles
|
|
17
|
+
to that document's own style names. `load_style_map` loads it; partial maps fall
|
|
18
|
+
back to the defaults. This lets a private or corporate template be applied
|
|
19
|
+
without it living in the package.
|
|
20
|
+
- `convert`'s default output is now `<source-stem>_profile.docx`.
|
|
21
|
+
- Add a `make profile CV=cv.pdf [OUT=..] [OFFLINE=1]` convenience target that
|
|
22
|
+
renders a branded profile using a gitignored `local/template.docx` and
|
|
23
|
+
`local/style-map.toml`; copy `examples/style-map.example.toml` to
|
|
24
|
+
`local/style-map.toml` and edit it. `local/` and `.env` are gitignored, keeping
|
|
25
|
+
confidential templates and API keys out of the repository.
|
|
26
|
+
|
|
27
|
+
### Increment 0 — Tooling & dependencies foundation
|
|
28
|
+
|
|
29
|
+
- Add runtime dependencies: `pydantic`, `python-docx`, `pdfplumber`, `anthropic`.
|
|
30
|
+
- Add optional `pdf-fast` extra (`pymupdf`) for the swappable PDF backend.
|
|
31
|
+
- Add `reportlab` and `mypy` to the `dev` extra; fold `docs` + `pdf-fast` in.
|
|
32
|
+
- Wire `mypy --strict` (scoped to `src/`) into the tooling.
|
|
33
|
+
- Replace the single-command CLI with a Click group exposing `convert` and
|
|
34
|
+
`make-template` (stubs at this stage); add the `cv-formatter` entry-point alias.
|
|
35
|
+
- Remove the scaffold's placeholder `api.py` (superseded by the forthcoming
|
|
36
|
+
`pipeline.py`).
|
|
37
|
+
- Add an offline guard (socket-blocking autouse fixture) and CLI smoke tests;
|
|
38
|
+
enforce `mypy --strict` in CI via a `typecheck` tox env.
|
|
39
|
+
- Add a synthetic, fully-fictitious CV fixture corpus (`tests/fixtures/`) with a
|
|
40
|
+
`cv_corpus` test fixture; record the profile-format decisions in the spec.
|
|
41
|
+
|
|
42
|
+
### Increment 1 — The Candidate data contract
|
|
43
|
+
|
|
44
|
+
- Add the Pydantic v2 `Candidate` model and `EmploymentEntry` / `ProjectEntry` /
|
|
45
|
+
`EducationEntry` sub-models — the single contract shared by the whole pipeline.
|
|
46
|
+
Every field defaults to `"Not stated"` / `[]`; nothing is required.
|
|
47
|
+
- Export `Candidate` (and sub-models, `NOT_STATED`, `is_not_stated`) from the
|
|
48
|
+
package root; enable the `pydantic.mypy` plugin.
|
|
49
|
+
|
|
50
|
+
### Increment 2 — Extractors (stage 2, fully offline)
|
|
51
|
+
|
|
52
|
+
- Add `profgen.extractors`: `ExtractedDocument` + `Extractor` protocol, plain-text
|
|
53
|
+
(UTF-8 with CP1252 fallback), `.docx` (python-docx, paragraphs + tables in
|
|
54
|
+
document order) and `.pdf` (pdfplumber default, pymupdf swappable via the
|
|
55
|
+
`pdf-fast` extra) backends, with extension-based `extract()` dispatch.
|
|
56
|
+
- Extractors do no interpretation — verbatim text only; `normalized_text`
|
|
57
|
+
collapses whitespace for grounding-friendly matching.
|
|
58
|
+
|
|
59
|
+
### Increment 3 — LLM structuring layer (stage 3)
|
|
60
|
+
|
|
61
|
+
- Add `profgen.llm`: the `StructuringClient` protocol — the single interface that
|
|
62
|
+
turns an `ExtractedDocument` into a typed `Candidate` — with two implementations
|
|
63
|
+
behind it.
|
|
64
|
+
- Add `ClaudeStructuringClient`, the production client. It calls Claude
|
|
65
|
+
(`claude-sonnet-4-6`) with forced, strict tool use against one tool whose schema
|
|
66
|
+
is the `Candidate` model, `temperature=0` and `max_tokens=8192`. Constructing it
|
|
67
|
+
touches no network or environment; the Anthropic SDK client is built lazily on
|
|
68
|
+
first use (or injected for testing). It is wired but, by design, never exercised
|
|
69
|
+
in CI.
|
|
70
|
+
- Add `HeuristicStructuringClient`, a deterministic, network-free parser that reads
|
|
71
|
+
the synthetic corpus by section headers. It is the `--offline` and test path and
|
|
72
|
+
invents nothing — absent fields stay at the `"Not stated"` / `[]` defaults.
|
|
73
|
+
- Add `StructuringError` for transport, API and malformed-response failures, and
|
|
74
|
+
the `SYSTEM_PROMPT` / `TOOL_NAME` prompt constants. The system prompt encodes the
|
|
75
|
+
no-invented-facts rule, the `"Not stated"` sentinel, the no-derived-fields rule,
|
|
76
|
+
British English, and the semiconductor skill bucketing.
|
|
77
|
+
|
|
78
|
+
### Increment 4 — Grounding check & review report (stages 4 & 6)
|
|
79
|
+
|
|
80
|
+
- Add `profgen.pipeline` with four pure, deterministic, LLM-independent functions,
|
|
81
|
+
all re-exported from the package root.
|
|
82
|
+
- `check_grounding` — the anti-hallucination guard. For each groundable entity
|
|
83
|
+
(company, tool, certification, institution, project name, project domain) it
|
|
84
|
+
confirms the value appears in the verbatim stage-2 text via a
|
|
85
|
+
whitespace-collapsed, case-folded substring match, and returns a British-English
|
|
86
|
+
note for every entity that does not. Honestly-absent (`"Not stated"` / `[]`)
|
|
87
|
+
values are never flagged; company is verified for grounding even though it is
|
|
88
|
+
never rendered (SPEC §14.1).
|
|
89
|
+
- `collect_missing_information` — lists every scalar left at the `"Not stated"`
|
|
90
|
+
sentinel and every empty list, in model field order.
|
|
91
|
+
- `annotate_candidate` — the composition seam: runs both of the above and returns
|
|
92
|
+
a copy of the `Candidate` with the pipeline-populated `source_confidence_notes`
|
|
93
|
+
and `missing_information` fields set, without mutating the input.
|
|
94
|
+
- `render_review_report` — renders the deterministic `*.review.md` body (missing
|
|
95
|
+
information / assumptions made / items to verify before customer submission) from
|
|
96
|
+
an already-annotated candidate, in British English. (End-to-end `run_pipeline`
|
|
97
|
+
orchestration remains a later increment.)
|
|
98
|
+
|
|
99
|
+
### Increment 5 — Word rendering & starter template (stage 5)
|
|
100
|
+
|
|
101
|
+
- Add `profgen.template.word_renderer`: the deterministic, fully-offline Word
|
|
102
|
+
renderer. `render_profile(candidate, *, template_path=None, style_map=None)`
|
|
103
|
+
builds the `python-docx` `Document`; `write_profile(candidate, output_path, *,
|
|
104
|
+
template_path=None, style_map=None)` renders and saves it.
|
|
105
|
+
- Use the **style-donor** approach (SPEC §8): keep the donor's header, footer and
|
|
106
|
+
fonts, and write variable-length content programmatically against five logical
|
|
107
|
+
roles (`title`, `date_heading`, `body`, `bullet`, `legal`, exposed as the
|
|
108
|
+
`ROLE_TITLE/ROLE_DATE/ROLE_BODY/ROLE_BULLET/ROLE_LEGAL` constants) — no
|
|
109
|
+
Jinja-style loops in the `.docx`. A *style map* resolves each role to a concrete
|
|
110
|
+
paragraph-style name; the neutral `DEFAULT_STYLE_MAP` maps them to
|
|
111
|
+
`Profile Title`, `Profile Date`, `Normal`, `List Bullet` and `Profile Legal`. A
|
|
112
|
+
house style is applied by supplying a donor plus a style map that points the
|
|
113
|
+
roles at that document's own style names; an absent style falls back to the
|
|
114
|
+
default.
|
|
115
|
+
- Add `make_template(path)`, which writes a starter `.docx` carrying the default
|
|
116
|
+
named styles, and wire the `make-template` CLI command to it (prints
|
|
117
|
+
`Wrote starter template to <path>`).
|
|
118
|
+
- Enforce **employer anonymisation** (§14.1): company names are never rendered;
|
|
119
|
+
experience is shown as `Project N | <domain>`. Enforce **no derived fields**
|
|
120
|
+
(§14.3): the skills table's "Years Experience" column renders `"Not stated"`
|
|
121
|
+
for every row. Honestly render `"Not stated"` where the source was silent.
|
|
122
|
+
- All rendered text is British English. (`convert` orchestration remains a stub
|
|
123
|
+
until Increment 6.)
|
|
124
|
+
|
|
125
|
+
### Increment 6 — End-to-end `convert` orchestration (offline path)
|
|
126
|
+
|
|
127
|
+
- Add `profgen.run_pipeline(source, output, *, offline=False, template_path=None,
|
|
128
|
+
style_map=None, client=None) -> PipelineResult` — the full orchestration
|
|
129
|
+
(SPEC §3): extract →
|
|
130
|
+
structure → annotate/ground → render `.docx` → write the sibling `*.review.md`.
|
|
131
|
+
An injected `client` wins; otherwise `--offline` selects the deterministic
|
|
132
|
+
`HeuristicStructuringClient` and the default path selects `ClaudeStructuringClient`
|
|
133
|
+
(`ANTHROPIC_API_KEY` required, reached only on first `structure`). The offline /
|
|
134
|
+
injected path is fully deterministic and performs no network access.
|
|
135
|
+
- Add the frozen `PipelineResult` dataclass (`candidate`, `profile_path`,
|
|
136
|
+
`review_path`, `needs_verification`); both re-exported from the package root.
|
|
137
|
+
`needs_verification` is `True` when any entity was ungrounded or any field was
|
|
138
|
+
left unstated.
|
|
139
|
+
- Wire the `convert` CLI command to `run_pipeline`. It writes the profile to
|
|
140
|
+
`--output` (default `<source-stem>_profile.docx` in the CWD) plus the sibling
|
|
141
|
+
`*.review.md` (`out.docx` → `out.review.md`), and prints a manual-verification
|
|
142
|
+
warning to stderr when `needs_verification` is set. Missing `ANTHROPIC_API_KEY`
|
|
143
|
+
or an online failure is reported as a friendly message advising `--offline`.
|
|
144
|
+
|
|
145
|
+
### Increment 7 — Examples, real-path smoke, README & acceptance hardening
|
|
146
|
+
|
|
147
|
+
- Add `examples/build_example_profile.py` — the runnable offline example (SPEC §12
|
|
148
|
+
criterion 4). It drives `run_pipeline` on the bundled
|
|
149
|
+
`examples/input_cvs/sample_cv.txt` with `offline=True`, needs **no API key and
|
|
150
|
+
makes no network call**, and writes `sample_profile.docx` plus its sibling
|
|
151
|
+
`sample_profile.review.md` into the gitignored `examples/output_profiles/`. The
|
|
152
|
+
run is deterministic.
|
|
153
|
+
- Add `examples/smoke_real_path.py` — the only place that exercises the real Claude
|
|
154
|
+
path (`offline=False`), double-guarded behind `PROFGEN_REAL_SMOKE=1` **and**
|
|
155
|
+
`ANTHROPIC_API_KEY`. When either is absent it prints a one-line message and exits
|
|
156
|
+
`0` without touching the network, so it is inert in the offline suite and **never
|
|
157
|
+
runs in CI** by design.
|
|
158
|
+
- Rewrite the placeholder PyScaffold `README.md` into a real description: install,
|
|
159
|
+
the `convert` / `make-template` usage (incl. `--offline` and the `cv-formatter`
|
|
160
|
+
alias), and the no-invented-facts guarantee.
|
|
161
|
+
- Add the Sphinx User Guide and finalise the docs sweep; confirm the API reference
|
|
162
|
+
(autodoc) still builds cleanly.
|
|
163
|
+
- Final acceptance sweep against SPEC §12: all eight criteria hold; `ruff`,
|
|
164
|
+
`mypy --strict` (scoped to `src/`) and offline `pytest` are green. The SPEC §13
|
|
165
|
+
open seams (OCR for scanned PDFs; a fixed-layout `docxtpl` placeholder renderer)
|
|
166
|
+
are deliberately left unbuilt.
|
|
167
|
+
|
|
168
|
+
<!-- No tagged release yet. The project is pre-release; setuptools_scm derives
|
|
169
|
+
the version from git (currently 0.0.x.devN). The first real release will be
|
|
170
|
+
tagged once the pipeline is functional. -->
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
Contributions are welcome! Please follow these guidelines:
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
1. Clone the repository
|
|
8
|
+
2. Install development dependencies: `make dev`
|
|
9
|
+
3. Install pre-commit hooks: `pre-commit install`
|
|
10
|
+
|
|
11
|
+
## Code Style
|
|
12
|
+
|
|
13
|
+
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
|
14
|
+
Run `make lint` to check and `make format` to auto-format.
|
|
15
|
+
|
|
16
|
+
## Testing
|
|
17
|
+
|
|
18
|
+
Run tests with `make test` or `pytest` directly.
|
|
19
|
+
|
|
20
|
+
## Pull Requests
|
|
21
|
+
|
|
22
|
+
1. Fork the repository
|
|
23
|
+
2. Create a feature branch
|
|
24
|
+
3. Make your changes
|
|
25
|
+
4. Run tests and linting
|
|
26
|
+
5. Submit a pull request
|
|
27
|
+
|
|
28
|
+
## Issue Reporting
|
|
29
|
+
|
|
30
|
+
Please use the issue tracker to report bugs or request features.
|
|
31
|
+
Include as much detail as possible.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kevin Steptoe
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Keep confidential, repo-private files out of the published sdist.
|
|
2
|
+
# setuptools_scm includes every git-tracked file by default, so the private
|
|
3
|
+
# design docs and agent definitions are explicitly excluded here.
|
|
4
|
+
exclude cv_formatter_SPEC.md
|
|
5
|
+
exclude cv_formatter_IMPLEMENTATION_PLAN.md
|
|
6
|
+
exclude cv_formatter_ROADMAP.md
|
|
7
|
+
exclude CLAUDE.md
|
|
8
|
+
prune .claude
|
|
9
|
+
prune tests/samples
|