profgen 0.0.1rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. profgen-0.0.1rc1/.coveragerc +28 -0
  2. profgen-0.0.1rc1/.github/workflows/ci.yml +145 -0
  3. profgen-0.0.1rc1/.gitignore +73 -0
  4. profgen-0.0.1rc1/.pre-commit-config.yaml +29 -0
  5. profgen-0.0.1rc1/.readthedocs.yml +24 -0
  6. profgen-0.0.1rc1/AUTHORS.md +3 -0
  7. profgen-0.0.1rc1/CHANGELOG.md +170 -0
  8. profgen-0.0.1rc1/CONTRIBUTING.md +31 -0
  9. profgen-0.0.1rc1/LICENSE.txt +21 -0
  10. profgen-0.0.1rc1/MANIFEST.in +9 -0
  11. profgen-0.0.1rc1/Makefile +374 -0
  12. profgen-0.0.1rc1/PKG-INFO +209 -0
  13. profgen-0.0.1rc1/README.md +162 -0
  14. profgen-0.0.1rc1/docs/Makefile +29 -0
  15. profgen-0.0.1rc1/docs/_static/.gitignore +1 -0
  16. profgen-0.0.1rc1/docs/authors.md +3 -0
  17. profgen-0.0.1rc1/docs/changelog.md +3 -0
  18. profgen-0.0.1rc1/docs/conf.py +147 -0
  19. profgen-0.0.1rc1/docs/contributing.md +3 -0
  20. profgen-0.0.1rc1/docs/index.md +33 -0
  21. profgen-0.0.1rc1/docs/license.md +5 -0
  22. profgen-0.0.1rc1/docs/readme.md +3 -0
  23. profgen-0.0.1rc1/docs/userguide.md +463 -0
  24. profgen-0.0.1rc1/examples/build_example_profile.py +51 -0
  25. profgen-0.0.1rc1/examples/input_cvs/sample_cv.txt +48 -0
  26. profgen-0.0.1rc1/examples/smoke_real_path.py +58 -0
  27. profgen-0.0.1rc1/examples/style-map.example.toml +16 -0
  28. profgen-0.0.1rc1/pyproject.toml +121 -0
  29. profgen-0.0.1rc1/setup.cfg +4 -0
  30. profgen-0.0.1rc1/src/profgen/__init__.py +43 -0
  31. profgen-0.0.1rc1/src/profgen/__main__.py +6 -0
  32. profgen-0.0.1rc1/src/profgen/_version.py +24 -0
  33. profgen-0.0.1rc1/src/profgen/cli.py +143 -0
  34. profgen-0.0.1rc1/src/profgen/extractors/__init__.py +58 -0
  35. profgen-0.0.1rc1/src/profgen/extractors/base.py +47 -0
  36. profgen-0.0.1rc1/src/profgen/extractors/docx.py +43 -0
  37. profgen-0.0.1rc1/src/profgen/extractors/pdf.py +46 -0
  38. profgen-0.0.1rc1/src/profgen/extractors/txt.py +26 -0
  39. profgen-0.0.1rc1/src/profgen/llm/__init__.py +26 -0
  40. profgen-0.0.1rc1/src/profgen/llm/claude_client.py +361 -0
  41. profgen-0.0.1rc1/src/profgen/llm/prompts.py +72 -0
  42. profgen-0.0.1rc1/src/profgen/models/__init__.py +21 -0
  43. profgen-0.0.1rc1/src/profgen/models/candidate.py +100 -0
  44. profgen-0.0.1rc1/src/profgen/pipeline.py +273 -0
  45. profgen-0.0.1rc1/src/profgen/template/__init__.py +37 -0
  46. profgen-0.0.1rc1/src/profgen/template/word_renderer.py +355 -0
  47. profgen-0.0.1rc1/src/profgen.egg-info/PKG-INFO +209 -0
  48. profgen-0.0.1rc1/src/profgen.egg-info/SOURCES.txt +73 -0
  49. profgen-0.0.1rc1/src/profgen.egg-info/dependency_links.txt +1 -0
  50. profgen-0.0.1rc1/src/profgen.egg-info/entry_points.txt +3 -0
  51. profgen-0.0.1rc1/src/profgen.egg-info/requires.txt +25 -0
  52. profgen-0.0.1rc1/src/profgen.egg-info/top_level.txt +1 -0
  53. profgen-0.0.1rc1/tests/README.md +11 -0
  54. profgen-0.0.1rc1/tests/conftest.py +53 -0
  55. profgen-0.0.1rc1/tests/fixtures/__init__.py +33 -0
  56. profgen-0.0.1rc1/tests/fixtures/build_corpus.py +32 -0
  57. profgen-0.0.1rc1/tests/fixtures/builders.py +224 -0
  58. profgen-0.0.1rc1/tests/fixtures/personas.py +400 -0
  59. profgen-0.0.1rc1/tests/integration/test_convert_cli.py +112 -0
  60. profgen-0.0.1rc1/tests/integration/test_extractors.py +83 -0
  61. profgen-0.0.1rc1/tests/integration/test_fixtures.py +102 -0
  62. profgen-0.0.1rc1/tests/integration/test_grounding_corpus.py +105 -0
  63. profgen-0.0.1rc1/tests/integration/test_layout.py +12 -0
  64. profgen-0.0.1rc1/tests/integration/test_pipeline_run.py +233 -0
  65. profgen-0.0.1rc1/tests/integration/test_rendering_corpus.py +184 -0
  66. profgen-0.0.1rc1/tests/integration/test_structuring_corpus.py +161 -0
  67. profgen-0.0.1rc1/tests/unit/test_candidate.py +117 -0
  68. profgen-0.0.1rc1/tests/unit/test_cli.py +64 -0
  69. profgen-0.0.1rc1/tests/unit/test_extractor_dispatch.py +86 -0
  70. profgen-0.0.1rc1/tests/unit/test_grounding.py +247 -0
  71. profgen-0.0.1rc1/tests/unit/test_import.py +7 -0
  72. profgen-0.0.1rc1/tests/unit/test_pipeline_paths.py +36 -0
  73. profgen-0.0.1rc1/tests/unit/test_structuring.py +175 -0
  74. profgen-0.0.1rc1/tests/unit/test_style_map.py +64 -0
  75. profgen-0.0.1rc1/tox.ini +102 -0
@@ -0,0 +1,28 @@
1
+ # .coveragerc to control coverage.py
2
+ [run]
3
+ branch = True
4
+ source = profgen
5
+ # omit = bad_file.py
6
+
7
+ [paths]
8
+ source =
9
+ src/
10
+ */site-packages/
11
+
12
+ [report]
13
+ # Regexes for lines to exclude from consideration
14
+ exclude_lines =
15
+ # Have to re-enable the standard pragma
16
+ pragma: no cover
17
+
18
+ # Don't complain about missing debug-only code:
19
+ def __repr__
20
+ if self\.debug
21
+
22
+ # Don't complain if tests don't hit defensive assertion code:
23
+ raise AssertionError
24
+ raise NotImplementedError
25
+
26
+ # Don't complain if non-runnable code isn't run:
27
+ if 0:
28
+ if __name__ == .__main__.:
@@ -0,0 +1,145 @@
1
+ # GitHub Actions configuration **EXAMPLE**,
2
+ # MODIFY IT ACCORDING TO YOUR NEEDS!
3
+ # Reference: https://docs.github.com/en/actions
4
+
5
+ name: tests
6
+
7
+ on:
8
+ push:
9
+ # Avoid using all the resources/limits available by checking only
10
+ # relevant branches and tags. Other branches can be checked via PRs.
11
+ branches: [main]
12
+ tags: ['v[0-9]*', '[0-9]+.[0-9]+*'] # Match tags that resemble a version
13
+ pull_request: # Run in every PR
14
+ workflow_dispatch: # Allow manually triggering the workflow
15
+ schedule:
16
+ # Run roughly every 15 days at 00:00 UTC
17
+ # (useful to check if updates on dependencies break the package)
18
+ - cron: '0 0 1,16 * *'
19
+
20
+ permissions:
21
+ contents: read
22
+
23
+ concurrency:
24
+ group: >-
25
+ ${{ github.workflow }}-${{ github.ref_type }}-
26
+ ${{ github.event.pull_request.number || github.sha }}
27
+ cancel-in-progress: true
28
+
29
+ jobs:
30
+ prepare:
31
+ runs-on: ubuntu-latest
32
+ outputs:
33
+ wheel-distribution: ${{ steps.wheel-distribution.outputs.path }}
34
+ steps:
35
+ - uses: actions/checkout@v4
36
+ with: {fetch-depth: 0} # deep clone for setuptools-scm
37
+ - uses: actions/setup-python@v5
38
+ id: setup-python
39
+ with: {python-version: "3.13"}
40
+ - name: Run static analysis and format checkers
41
+ run: pipx run pre-commit run --all-files --show-diff-on-failure
42
+ - name: Type-check with mypy --strict
43
+ run: >-
44
+ pipx run --python '${{ steps.setup-python.outputs.python-path }}'
45
+ tox -e typecheck
46
+ - name: Build package distribution files
47
+ run: >-
48
+ pipx run --python '${{ steps.setup-python.outputs.python-path }}'
49
+ tox -e clean,build
50
+ - name: Record the path of wheel distribution
51
+ id: wheel-distribution
52
+ run: echo "path=$(ls dist/*.whl)" >> $GITHUB_OUTPUT
53
+ - name: Store the distribution files for use in other stages
54
+ # `tests` and `publish` will use the same pre-built distributions,
55
+ # so we make sure to release the exact same package that was tested
56
+ uses: actions/upload-artifact@v4
57
+ with:
58
+ name: python-distribution-files
59
+ path: dist/
60
+ retention-days: 1
61
+
62
+ test:
63
+ needs: prepare
64
+ strategy:
65
+ matrix:
66
+ python:
67
+ - "3.13" # minimum supported (see requires-python in pyproject.toml)
68
+ platform:
69
+ - ubuntu-latest
70
+ - macos-latest
71
+ - windows-latest
72
+ runs-on: ${{ matrix.platform }}
73
+ steps:
74
+ - uses: actions/checkout@v4
75
+ - uses: actions/setup-python@v5
76
+ id: setup-python
77
+ with:
78
+ python-version: ${{ matrix.python }}
79
+ - name: Retrieve pre-built distribution files
80
+ uses: actions/download-artifact@v4
81
+ with: {name: python-distribution-files, path: dist/}
82
+ - name: Run tests
83
+ run: >-
84
+ pipx run --python '${{ steps.setup-python.outputs.python-path }}'
85
+ tox --installpkg '${{ needs.prepare.outputs.wheel-distribution }}'
86
+ -- -rFEx --durations 10 --color yes # pytest args
87
+ - name: Generate coverage report
88
+ run: pipx run coverage lcov -o coverage.lcov
89
+ - name: Upload partial coverage report
90
+ continue-on-error: true # Coveralls not configured for this repo; don't fail CI on it
91
+ uses: coverallsapp/github-action@master
92
+ with:
93
+ path-to-lcov: coverage.lcov
94
+ github-token: ${{ secrets.GITHUB_TOKEN }}
95
+ flag-name: ${{ matrix.platform }} - py${{ matrix.python }}
96
+ parallel: true
97
+
98
+ finalize:
99
+ needs: test
100
+ runs-on: ubuntu-latest
101
+ steps:
102
+ - name: Finalize coverage report
103
+ continue-on-error: true # Coveralls not configured for this repo; don't fail CI on it
104
+ uses: coverallsapp/github-action@master
105
+ with:
106
+ github-token: ${{ secrets.GITHUB_TOKEN }}
107
+ parallel-finished: true
108
+
109
+ # Final version tags (e.g. v0.0.1) -> real PyPI. Pre-release tags (…rc…) are
110
+ # excluded here and routed to TestPyPI by the publish-testpypi job below.
111
+ publish:
112
+ needs: finalize
113
+ if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && !contains(github.ref, 'rc') }}
114
+ runs-on: ubuntu-latest
115
+ # Trusted Publishing (OIDC) — no API token/secret. The PyPI publisher is
116
+ # registered against this repo's `ci.yml` workflow and the `pypi` environment.
117
+ environment: pypi
118
+ permissions:
119
+ id-token: write # required for the OIDC token exchange with PyPI
120
+ contents: read
121
+ steps:
122
+ - name: Retrieve pre-built distribution files
123
+ uses: actions/download-artifact@v4
124
+ with: {name: python-distribution-files, path: dist/}
125
+ - name: Publish to PyPI (Trusted Publishing)
126
+ uses: pypa/gh-action-pypi-publish@release/v1
127
+
128
+ # Pre-release tags (e.g. v0.0.1rc1) -> TestPyPI dry-run. Same OIDC mechanism,
129
+ # registered against this repo's `ci.yml` workflow and the `testpypi` environment.
130
+ publish-testpypi:
131
+ needs: finalize
132
+ if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && contains(github.ref, 'rc') }}
133
+ runs-on: ubuntu-latest
134
+ environment: testpypi
135
+ permissions:
136
+ id-token: write
137
+ contents: read
138
+ steps:
139
+ - name: Retrieve pre-built distribution files
140
+ uses: actions/download-artifact@v4
141
+ with: {name: python-distribution-files, path: dist/}
142
+ - name: Publish to TestPyPI (Trusted Publishing, dry-run)
143
+ uses: pypa/gh-action-pypi-publish@release/v1
144
+ with:
145
+ repository-url: https://test.pypi.org/legacy/
@@ -0,0 +1,73 @@
1
+ # Real candidate CVs — contain PII (names, emails, phone numbers).
2
+ # NEVER commit these. Anonymised, generated fixtures live elsewhere.
3
+ tests/samples/
4
+
5
+ # Local preview output of the synthetic CV corpus (regenerable).
6
+ .fixture_preview/
7
+
8
+ # Generated example profiles (regenerable via examples/build_example_profile.py).
9
+ examples/output_profiles/
10
+
11
+ # Temporary and binary files
12
+ *~
13
+ *.py[cod]
14
+ *.so
15
+ *.cfg
16
+ !.isort.cfg
17
+ !setup.cfg
18
+ *.orig
19
+ *.log
20
+ *.pot
21
+ __pycache__/*
22
+ .cache/*
23
+ .*.swp
24
+ */.ipynb_checkpoints/*
25
+ .DS_Store
26
+
27
+ # Project files
28
+ .ropeproject
29
+ .project
30
+ .pydevproject
31
+ .settings
32
+ .idea
33
+ .vscode
34
+ tags
35
+
36
+ # Package files
37
+ *.egg
38
+ *.eggs/
39
+ .installed.cfg
40
+ *.egg-info
41
+
42
+ # Unittest and coverage
43
+ htmlcov/*
44
+ .coverage
45
+ .coverage.*
46
+ .tox
47
+ junit*.xml
48
+ coverage.xml
49
+ .pytest_cache/
50
+
51
+ # Build and docs folder/files
52
+ build/*
53
+ dist/*
54
+ sdist/*
55
+ docs/api/*
56
+ docs/_rst/*
57
+ docs/_build/*
58
+ cover/*
59
+ MANIFEST
60
+
61
+ # Per-project virtualenvs
62
+ .venv*/
63
+ .conda*/
64
+ .python-version
65
+ # Generated by setuptools_scm
66
+ src/profgen/_version.py
67
+
68
+ # Local secrets / API keys — never commit
69
+ .env
70
+ .env.*
71
+
72
+ # Private branded assets for `make profile` (confidential templates + style maps)
73
+ local/
@@ -0,0 +1,29 @@
1
+ ci:
2
+ autoupdate_schedule: quarterly
3
+
4
+ default_language_version:
5
+ python: python3.13
6
+
7
+ repos:
8
+ - repo: https://github.com/astral-sh/ruff-pre-commit
9
+ rev: v0.6.9 # pin to a tag; update as needed
10
+ hooks:
11
+ - id: ruff
12
+ args: [--fix, --exit-non-zero-on-fix]
13
+ - id: ruff-format
14
+
15
+ - repo: https://github.com/pre-commit/pre-commit-hooks
16
+ rev: v4.6.0 # pin to a tag; update as needed
17
+ hooks:
18
+ - id: check-added-large-files
19
+ - id: end-of-file-fixer
20
+ - id: trailing-whitespace
21
+ - id: check-merge-conflict
22
+
23
+
24
+ # If you want Black as well, uncomment this block and keep its rev pinned
25
+ # - repo: https://github.com/psf/black
26
+ # rev: 24.10.0 # example pinned release
27
+ # hooks:
28
+ # - id: black
29
+ # args: ["--line-length=100"]
@@ -0,0 +1,24 @@
1
+ # Read the Docs configuration file
2
+ # https://docs.readthedocs.io/en/stable/config-file/v2.html
3
+
4
+ version: 2
5
+
6
+ build:
7
+ os: ubuntu-22.04
8
+ tools:
9
+ python: "3.13"
10
+
11
+ sphinx:
12
+ configuration: docs/conf.py
13
+
14
+ formats:
15
+ - pdf
16
+
17
+ python:
18
+ install:
19
+ # Install the package with its `docs` extra (single source of truth in
20
+ # pyproject.toml). No docs/requirements.txt.
21
+ - method: pip
22
+ path: .
23
+ extras:
24
+ - docs
@@ -0,0 +1,3 @@
1
+ # Contributors
2
+
3
+ - Kevin Steptoe <kevin.steptoe@gmail.com>
@@ -0,0 +1,170 @@
1
+ # Changelog
2
+
3
+ ## Unreleased
4
+
5
+ ### De-brand & bring-your-own-template
6
+
7
+ - Remove all brand-specific naming from the user-facing surface ahead of the
8
+ public PyPI release: the output is now described as a generic "standardised
9
+ Word profile". The pipeline, the no-invented-facts rule, employer anonymisation
10
+ (`Project N | <domain>`) and the grounding check are unchanged.
11
+ - Render against **five logical roles** (`title`, `date_heading`, `body`,
12
+ `bullet`, `legal`) resolved to paragraph-style names through a *style map*. The
13
+ neutral `DEFAULT_STYLE_MAP` maps them to `Profile Title`, `Profile Date`,
14
+ `Normal`, `List Bullet` and `Profile Legal`.
15
+ - **Bring your own template:** `convert --template my.docx --style-map my.toml`
16
+ renders against your branded document, with the TOML file mapping the five roles
17
+ to that document's own style names. `load_style_map` loads it; partial maps fall
18
+ back to the defaults. This lets a private or corporate template be applied
19
+ without it living in the package.
20
+ - `convert`'s default output is now `<source-stem>_profile.docx`.
21
+ - Add a `make profile CV=cv.pdf [OUT=..] [OFFLINE=1]` convenience target that
22
+ renders a branded profile using a gitignored `local/template.docx` and
23
+ `local/style-map.toml`; copy `examples/style-map.example.toml` to
24
+ `local/style-map.toml` and edit it. `local/` and `.env` are gitignored, keeping
25
+ confidential templates and API keys out of the repository.
26
+
27
+ ### Increment 0 — Tooling & dependencies foundation
28
+
29
+ - Add runtime dependencies: `pydantic`, `python-docx`, `pdfplumber`, `anthropic`.
30
+ - Add optional `pdf-fast` extra (`pymupdf`) for the swappable PDF backend.
31
+ - Add `reportlab` and `mypy` to the `dev` extra; fold `docs` + `pdf-fast` in.
32
+ - Wire `mypy --strict` (scoped to `src/`) into the tooling.
33
+ - Replace the single-command CLI with a Click group exposing `convert` and
34
+ `make-template` (stubs at this stage); add the `cv-formatter` entry-point alias.
35
+ - Remove the scaffold's placeholder `api.py` (superseded by the forthcoming
36
+ `pipeline.py`).
37
+ - Add an offline guard (socket-blocking autouse fixture) and CLI smoke tests;
38
+ enforce `mypy --strict` in CI via a `typecheck` tox env.
39
+ - Add a synthetic, fully-fictitious CV fixture corpus (`tests/fixtures/`) with a
40
+ `cv_corpus` test fixture; record the profile-format decisions in the spec.
41
+
42
+ ### Increment 1 — The Candidate data contract
43
+
44
+ - Add the Pydantic v2 `Candidate` model and `EmploymentEntry` / `ProjectEntry` /
45
+ `EducationEntry` sub-models — the single contract shared by the whole pipeline.
46
+ Every field defaults to `"Not stated"` / `[]`; nothing is required.
47
+ - Export `Candidate` (and sub-models, `NOT_STATED`, `is_not_stated`) from the
48
+ package root; enable the `pydantic.mypy` plugin.
49
+
50
+ ### Increment 2 — Extractors (stage 2, fully offline)
51
+
52
+ - Add `profgen.extractors`: `ExtractedDocument` + `Extractor` protocol, plain-text
53
+ (UTF-8 with CP1252 fallback), `.docx` (python-docx, paragraphs + tables in
54
+ document order) and `.pdf` (pdfplumber default, pymupdf swappable via the
55
+ `pdf-fast` extra) backends, with extension-based `extract()` dispatch.
56
+ - Extractors do no interpretation — verbatim text only; `normalized_text`
57
+ collapses whitespace for grounding-friendly matching.
58
+
59
+ ### Increment 3 — LLM structuring layer (stage 3)
60
+
61
+ - Add `profgen.llm`: the `StructuringClient` protocol — the single interface that
62
+ turns an `ExtractedDocument` into a typed `Candidate` — with two implementations
63
+ behind it.
64
+ - Add `ClaudeStructuringClient`, the production client. It calls Claude
65
+ (`claude-sonnet-4-6`) with forced, strict tool use against one tool whose schema
66
+ is the `Candidate` model, `temperature=0` and `max_tokens=8192`. Constructing it
67
+ touches no network or environment; the Anthropic SDK client is built lazily on
68
+ first use (or injected for testing). It is wired but, by design, never exercised
69
+ in CI.
70
+ - Add `HeuristicStructuringClient`, a deterministic, network-free parser that reads
71
+ the synthetic corpus by section headers. It is the `--offline` and test path and
72
+ invents nothing — absent fields stay at the `"Not stated"` / `[]` defaults.
73
+ - Add `StructuringError` for transport, API and malformed-response failures, and
74
+ the `SYSTEM_PROMPT` / `TOOL_NAME` prompt constants. The system prompt encodes the
75
+ no-invented-facts rule, the `"Not stated"` sentinel, the no-derived-fields rule,
76
+ British English, and the semiconductor skill bucketing.
77
+
78
+ ### Increment 4 — Grounding check & review report (stages 4 & 6)
79
+
80
+ - Add `profgen.pipeline` with four pure, deterministic, LLM-independent functions,
81
+ all re-exported from the package root.
82
+ - `check_grounding` — the anti-hallucination guard. For each groundable entity
83
+ (company, tool, certification, institution, project name, project domain) it
84
+ confirms the value appears in the verbatim stage-2 text via a
85
+ whitespace-collapsed, case-folded substring match, and returns a British-English
86
+ note for every entity that does not. Honestly-absent (`"Not stated"` / `[]`)
87
+ values are never flagged; company is verified for grounding even though it is
88
+ never rendered (SPEC §14.1).
89
+ - `collect_missing_information` — lists every scalar left at the `"Not stated"`
90
+ sentinel and every empty list, in model field order.
91
+ - `annotate_candidate` — the composition seam: runs both of the above and returns
92
+ a copy of the `Candidate` with the pipeline-populated `source_confidence_notes`
93
+ and `missing_information` fields set, without mutating the input.
94
+ - `render_review_report` — renders the deterministic `*.review.md` body (missing
95
+ information / assumptions made / items to verify before customer submission) from
96
+ an already-annotated candidate, in British English. (End-to-end `run_pipeline`
97
+ orchestration remains a later increment.)
98
+
99
+ ### Increment 5 — Word rendering & starter template (stage 5)
100
+
101
+ - Add `profgen.template.word_renderer`: the deterministic, fully-offline Word
102
+ renderer. `render_profile(candidate, *, template_path=None, style_map=None)`
103
+ builds the `python-docx` `Document`; `write_profile(candidate, output_path, *,
104
+ template_path=None, style_map=None)` renders and saves it.
105
+ - Use the **style-donor** approach (SPEC §8): keep the donor's header, footer and
106
+ fonts, and write variable-length content programmatically against five logical
107
+ roles (`title`, `date_heading`, `body`, `bullet`, `legal`, exposed as the
108
+ `ROLE_TITLE/ROLE_DATE/ROLE_BODY/ROLE_BULLET/ROLE_LEGAL` constants) — no
109
+ Jinja-style loops in the `.docx`. A *style map* resolves each role to a concrete
110
+ paragraph-style name; the neutral `DEFAULT_STYLE_MAP` maps them to
111
+ `Profile Title`, `Profile Date`, `Normal`, `List Bullet` and `Profile Legal`. A
112
+ house style is applied by supplying a donor plus a style map that points the
113
+ roles at that document's own style names; an absent style falls back to the
114
+ default.
115
+ - Add `make_template(path)`, which writes a starter `.docx` carrying the default
116
+ named styles, and wire the `make-template` CLI command to it (prints
117
+ `Wrote starter template to <path>`).
118
+ - Enforce **employer anonymisation** (§14.1): company names are never rendered;
119
+ experience is shown as `Project N | <domain>`. Enforce **no derived fields**
120
+ (§14.3): the skills table's "Years Experience" column renders `"Not stated"`
121
+ for every row. Honestly render `"Not stated"` where the source was silent.
122
+ - All rendered text is British English. (`convert` orchestration remains a stub
123
+ until Increment 6.)
124
+
125
+ ### Increment 6 — End-to-end `convert` orchestration (offline path)
126
+
127
+ - Add `profgen.run_pipeline(source, output, *, offline=False, template_path=None,
128
+ style_map=None, client=None) -> PipelineResult` — the full orchestration
129
+ (SPEC §3): extract →
130
+ structure → annotate/ground → render `.docx` → write the sibling `*.review.md`.
131
+ An injected `client` wins; otherwise `--offline` selects the deterministic
132
+ `HeuristicStructuringClient` and the default path selects `ClaudeStructuringClient`
133
+ (`ANTHROPIC_API_KEY` required, reached only on first `structure`). The offline /
134
+ injected path is fully deterministic and performs no network access.
135
+ - Add the frozen `PipelineResult` dataclass (`candidate`, `profile_path`,
136
+ `review_path`, `needs_verification`); both re-exported from the package root.
137
+ `needs_verification` is `True` when any entity was ungrounded or any field was
138
+ left unstated.
139
+ - Wire the `convert` CLI command to `run_pipeline`. It writes the profile to
140
+ `--output` (default `<source-stem>_profile.docx` in the CWD) plus the sibling
141
+ `*.review.md` (`out.docx` → `out.review.md`), and prints a manual-verification
142
+ warning to stderr when `needs_verification` is set. Missing `ANTHROPIC_API_KEY`
143
+ or an online failure is reported as a friendly message advising `--offline`.
144
+
145
+ ### Increment 7 — Examples, real-path smoke, README & acceptance hardening
146
+
147
+ - Add `examples/build_example_profile.py` — the runnable offline example (SPEC §12
148
+ criterion 4). It drives `run_pipeline` on the bundled
149
+ `examples/input_cvs/sample_cv.txt` with `offline=True`, needs **no API key and
150
+ makes no network call**, and writes `sample_profile.docx` plus its sibling
151
+ `sample_profile.review.md` into the gitignored `examples/output_profiles/`. The
152
+ run is deterministic.
153
+ - Add `examples/smoke_real_path.py` — the only place that exercises the real Claude
154
+ path (`offline=False`), double-guarded behind `PROFGEN_REAL_SMOKE=1` **and**
155
+ `ANTHROPIC_API_KEY`. When either is absent it prints a one-line message and exits
156
+ `0` without touching the network, so it is inert in the offline suite and **never
157
+ runs in CI** by design.
158
+ - Rewrite the placeholder PyScaffold `README.md` into a real description: install,
159
+ the `convert` / `make-template` usage (incl. `--offline` and the `cv-formatter`
160
+ alias), and the no-invented-facts guarantee.
161
+ - Add the Sphinx User Guide and finalise the docs sweep; confirm the API reference
162
+ (autodoc) still builds cleanly.
163
+ - Final acceptance sweep against SPEC §12: all eight criteria hold; `ruff`,
164
+ `mypy --strict` (scoped to `src/`) and offline `pytest` are green. The SPEC §13
165
+ open seams (OCR for scanned PDFs; a fixed-layout `docxtpl` placeholder renderer)
166
+ are deliberately left unbuilt.
167
+
168
+ <!-- No tagged release yet. The project is pre-release; setuptools_scm derives
169
+ the version from git (currently 0.0.x.devN). The first real release will be
170
+ tagged once the pipeline is functional. -->
@@ -0,0 +1,31 @@
1
+ # Contributing
2
+
3
+ Contributions are welcome! Please follow these guidelines:
4
+
5
+ ## Development Setup
6
+
7
+ 1. Clone the repository
8
+ 2. Install development dependencies: `make dev`
9
+ 3. Install pre-commit hooks: `pre-commit install`
10
+
11
+ ## Code Style
12
+
13
+ This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
14
+ Run `make lint` to check and `make format` to auto-format.
15
+
16
+ ## Testing
17
+
18
+ Run tests with `make test` or `pytest` directly.
19
+
20
+ ## Pull Requests
21
+
22
+ 1. Fork the repository
23
+ 2. Create a feature branch
24
+ 3. Make your changes
25
+ 4. Run tests and linting
26
+ 5. Submit a pull request
27
+
28
+ ## Issue Reporting
29
+
30
+ Please use the issue tracker to report bugs or request features.
31
+ Include as much detail as possible.
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2026 Kevin Steptoe
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,9 @@
1
+ # Keep confidential, repo-private files out of the published sdist.
2
+ # setuptools_scm includes every git-tracked file by default, so the private
3
+ # design docs and agent definitions are explicitly excluded here.
4
+ exclude cv_formatter_SPEC.md
5
+ exclude cv_formatter_IMPLEMENTATION_PLAN.md
6
+ exclude cv_formatter_ROADMAP.md
7
+ exclude CLAUDE.md
8
+ prune .claude
9
+ prune tests/samples