priveil 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. priveil-0.1.0/.cursor/skills/conduit-core.md +54 -0
  2. priveil-0.1.0/.cursor/skills/conduit-py.md +50 -0
  3. priveil-0.1.0/.cursor/skills/domain-doc.md +70 -0
  4. priveil-0.1.0/.cursor/skills/ship-and-watch.md +86 -0
  5. priveil-0.1.0/.cursor/skills/vertical-slices.md +65 -0
  6. priveil-0.1.0/.dockerignore +17 -0
  7. priveil-0.1.0/.env.example +43 -0
  8. priveil-0.1.0/.github/dependabot.yml +18 -0
  9. priveil-0.1.0/.github/workflows/ci.yml +51 -0
  10. priveil-0.1.0/.github/workflows/publish.yml +35 -0
  11. priveil-0.1.0/.gitignore +15 -0
  12. priveil-0.1.0/.python-version +1 -0
  13. priveil-0.1.0/Dockerfile +38 -0
  14. priveil-0.1.0/Makefile +35 -0
  15. priveil-0.1.0/PKG-INFO +14 -0
  16. priveil-0.1.0/README.md +258 -0
  17. priveil-0.1.0/bruno/priveil-api/01-health.bru +15 -0
  18. priveil-0.1.0/bruno/priveil-api/02-detect-accurate.bru +24 -0
  19. priveil-0.1.0/bruno/priveil-api/03-detect-fast.bru +24 -0
  20. priveil-0.1.0/bruno/priveil-api/04-detect-empty-text-422.bru +23 -0
  21. priveil-0.1.0/bruno/priveil-api/05-detect-unsupported-language-422.bru +23 -0
  22. priveil-0.1.0/bruno/priveil-api/06-pseudonymise-accurate.bru +23 -0
  23. priveil-0.1.0/bruno/priveil-api/07-pseudonymise-fast.bru +23 -0
  24. priveil-0.1.0/bruno/priveil-api/08-pseudonymise-with-detections.bru +37 -0
  25. priveil-0.1.0/bruno/priveil-api/09-pseudonymise-operator-overrides.bru +27 -0
  26. priveil-0.1.0/bruno/priveil-api/10-assess-auto-detect.bru +23 -0
  27. priveil-0.1.0/bruno/priveil-api/11-assess-with-detections.bru +37 -0
  28. priveil-0.1.0/bruno/priveil-api/12-assess-low-risk.bru +23 -0
  29. priveil-0.1.0/bruno/priveil-api/13-assess-not-configured-503.bru +36 -0
  30. priveil-0.1.0/bruno/priveil-api/14-assess-empty-text-422.bru +22 -0
  31. priveil-0.1.0/bruno/priveil-api/bruno.json +5 -0
  32. priveil-0.1.0/bruno/priveil-api/environments/local.bru +3 -0
  33. priveil-0.1.0/docker-compose.yml +15 -0
  34. priveil-0.1.0/docs/domain/priveil.md +225 -0
  35. priveil-0.1.0/pyproject.toml +66 -0
  36. priveil-0.1.0/src/priveil/__init__.py +0 -0
  37. priveil-0.1.0/src/priveil/api/__init__.py +0 -0
  38. priveil-0.1.0/src/priveil/api/deps.py +43 -0
  39. priveil-0.1.0/src/priveil/api/routes/__init__.py +0 -0
  40. priveil-0.1.0/src/priveil/api/routes/assess.py +28 -0
  41. priveil-0.1.0/src/priveil/api/routes/detect.py +27 -0
  42. priveil-0.1.0/src/priveil/api/routes/health.py +14 -0
  43. priveil-0.1.0/src/priveil/api/routes/pseudonymise.py +37 -0
  44. priveil-0.1.0/src/priveil/app.py +75 -0
  45. priveil-0.1.0/src/priveil/domain/__init__.py +0 -0
  46. priveil-0.1.0/src/priveil/domain/assessment.py +44 -0
  47. priveil-0.1.0/src/priveil/domain/detection.py +45 -0
  48. priveil-0.1.0/src/priveil/domain/entities.py +69 -0
  49. priveil-0.1.0/src/priveil/domain/judgement.py +23 -0
  50. priveil-0.1.0/src/priveil/domain/pseudonymisation.py +46 -0
  51. priveil-0.1.0/src/priveil/engine/__init__.py +0 -0
  52. priveil-0.1.0/src/priveil/engine/analyser.py +99 -0
  53. priveil-0.1.0/src/priveil/engine/pseudonymiser.py +145 -0
  54. priveil-0.1.0/src/priveil/judge/__init__.py +0 -0
  55. priveil-0.1.0/src/priveil/judge/assessor.py +122 -0
  56. priveil-0.1.0/src/priveil/judge/model.py +82 -0
  57. priveil-0.1.0/src/priveil/judge/prompts/assessor.md +34 -0
  58. priveil-0.1.0/src/priveil/judge/prompts/refiner.md +19 -0
  59. priveil-0.1.0/src/priveil/judge/refiner.py +144 -0
  60. priveil-0.1.0/src/priveil/recognisers/__init__.py +0 -0
  61. priveil-0.1.0/src/priveil/recognisers/au_abn.py +46 -0
  62. priveil-0.1.0/src/priveil/recognisers/au_acn.py +45 -0
  63. priveil-0.1.0/src/priveil/recognisers/au_bsb.py +21 -0
  64. priveil-0.1.0/src/priveil/recognisers/au_medicare.py +45 -0
  65. priveil-0.1.0/src/priveil/recognisers/au_phone.py +30 -0
  66. priveil-0.1.0/src/priveil/recognisers/au_tfn.py +48 -0
  67. priveil-0.1.0/src/priveil/recognisers/registry.py +27 -0
  68. priveil-0.1.0/src/priveil/settings.py +30 -0
  69. priveil-0.1.0/tests/__init__.py +0 -0
  70. priveil-0.1.0/tests/conftest.py +119 -0
  71. priveil-0.1.0/tests/integration/__init__.py +0 -0
  72. priveil-0.1.0/tests/integration/test_assess.py +86 -0
  73. priveil-0.1.0/tests/integration/test_au_detect.py +79 -0
  74. priveil-0.1.0/tests/integration/test_detect.py +64 -0
  75. priveil-0.1.0/tests/integration/test_health.py +12 -0
  76. priveil-0.1.0/tests/integration/test_pseudonymise.py +77 -0
  77. priveil-0.1.0/tests/integration/test_refine.py +65 -0
  78. priveil-0.1.0/tests/unit/__init__.py +0 -0
  79. priveil-0.1.0/tests/unit/test_analyser.py +79 -0
  80. priveil-0.1.0/tests/unit/test_assessor.py +89 -0
  81. priveil-0.1.0/tests/unit/test_entities.py +41 -0
  82. priveil-0.1.0/tests/unit/test_judge_model.py +93 -0
  83. priveil-0.1.0/tests/unit/test_pseudonymiser.py +186 -0
  84. priveil-0.1.0/tests/unit/test_recognisers.py +146 -0
  85. priveil-0.1.0/tests/unit/test_refiner.py +130 -0
  86. priveil-0.1.0/uv.lock +3833 -0
@@ -0,0 +1,54 @@
1
+ ---
2
+ name: conduit-core
3
+ description: >
4
+ Forces the laziest clean pipeline that actually works. Question whether the
5
+ transform needs to exist at all, reach for standard and internal libraries
6
+ before writing anything new, validate at every trust boundary, write pure
7
+ functions that compose, document contracts, and treat security as
8
+ load-bearing. This is the language-agnostic core. Pair it with conduit-py
9
+ for concrete tooling.
10
+ argument-hint: "[lite|full|ultra]"
11
+ ---
12
+
13
+ # Conduit — Core
14
+
15
+ Lazy means efficient, not careless. The best code is the code never written.
16
+ Data flows in from hostile territory, gets validated, passes through the
17
+ minimum pure transforms required, and exits clean. Security is load-bearing,
18
+ not decorative. Types are documentation. The schema is the gate.
19
+
20
+ ## The Ladder
21
+
22
+ Stop at the first rung that holds:
23
+
24
+ 1. **Does this need to exist at all?** Speculative transform = skip it. (YAGNI)
25
+ 2. **Does an internal library already do it?** Use it.
26
+ 3. **Does the standard library do it?** Reach for it before any custom logic.
27
+ 4. **Does this data cross a serialization boundary?** Give it a schema.
28
+ 5. **Is this a transformation?** Pure function. Input → output, no side effects.
29
+ 6. **Can it be a pipeline?** Compose. One function per concern.
30
+ 7. **Does it cross a trust boundary?** Validate in, sanitize out, log the action (never the secret).
31
+ 8. **Is the contract documented?** Docstring — one-line summary, args, return, errors.
32
+ 9. **Only then:** write the minimum implementation that works.
33
+
34
+ ## Rules
35
+
36
+ **Laziness**
37
+ - No unrequested abstractions.
38
+ - Deletion over addition. Shortest working diff wins.
39
+ - `conduit:` comments mark deliberate simplifications — name the ceiling and the upgrade path.
40
+
41
+ **Security Champion**
42
+ - All external data is hostile until a parsed schema says otherwise.
43
+ - No secrets in logs, no secrets in code. Secrets come from the environment.
44
+ - Parameterised queries only — never build SQL or shell commands by string interpolation.
45
+ - Least privilege: functions receive only the data they need.
46
+ - On bad input: reject loudly with a typed error, never silently coerce.
47
+
48
+ ## Intensity
49
+
50
+ | Level | What changes |
51
+ |-------|-------------|
52
+ | **lite** | Build what's asked with type hints and a docstring. |
53
+ | **full** | Ladder enforced. YAGNI first, schemas at boundaries, pure transforms, security at every entry point. Default. |
54
+ | **ultra** | YAGNI extremist. Delete before adding. Challenge the requirement before writing a line. |
@@ -0,0 +1,50 @@
1
+ ---
2
+ name: conduit-py
3
+ description: >
4
+ Python tooling for the conduit discipline — Pydantic at every serialization
5
+ boundary, pure composable functions, stdlib-first functional style,
6
+ Google-style docstrings, and a pytest + hypothesis test stack.
7
+ Read conduit-core for the ladder, philosophy, and security principles.
8
+ argument-hint: "[lite|full|ultra]"
9
+ ---
10
+
11
+ > Read `conduit-core` before this file. This skill adds Python-specific tooling.
12
+
13
+ # Conduit — Python
14
+
15
+ ## Data & Types
16
+
17
+ - Pydantic models at every serialization boundary: API inputs, env vars (`BaseSettings`), external service responses.
18
+ - Never reach into deserialized JSON with `.get()` chains when a model exists. Use `Model.model_validate(data)`.
19
+ - Type hints on every function signature, return type included.
20
+ - Immutable by default: frozen Pydantic models, tuples over lists where mutation adds nothing.
21
+ - Never use `Any` without `# conduit: Any here because [reason]`.
22
+
23
+ ## Functional Style
24
+
25
+ - Pure functions are the default. Same input → same output, always.
26
+ - Small, composable units. One responsibility each.
27
+ - Generator pipelines for large data: never load what you can stream.
28
+ - `functools` first: `partial`, `reduce`, `lru_cache`.
29
+ - No mutable default arguments. Ever.
30
+
31
+ ## Security Champion (Python surface)
32
+
33
+ - No secrets in logs, no secrets in code. Env vars via `BaseSettings`.
34
+ - Secret variables wrapped in Pydantic's `SecretStr` or `Secret[T]`.
35
+ - Parameterised queries only. No f-strings into SQL or shell commands.
36
+ - On bad input: reject loudly with a clear `ValueError` or `ValidationError`, never silently coerce.
37
+
38
+ ## Documentation
39
+
40
+ - Google-style docstrings on every non-trivial function.
41
+ - One-line imperative summary (`Validate and parse...`, `Transform...`).
42
+ - Args / Returns / Raises — one short line each, only what isn't obvious from the type.
43
+ - Never document what the type signature already says.
44
+
45
+ ## Test stack
46
+
47
+ - `pytest` for all test running. No `unittest`.
48
+ - `hypothesis` for data edge cases: null values, empty collections, out-of-range values, schema surprises.
49
+ - `asyncio_mode = "auto"` in pytest config; use `async def test_` directly.
50
+ - No mocks of internal logic — test real behaviour with real (small) data.
@@ -0,0 +1,70 @@
1
+ # Domain Doc
2
+
3
+ You are a Domain-Driven Design facilitator helping a data engineering or data platform team build out their domain documentation. Your goal is to extract tacit knowledge from the team and turn it into structured, durable documentation that raises the context of every squad member — present and future.
4
+
5
+ ## When to use this skill
6
+
7
+ - A new domain or subdomain is being defined
8
+ - A team is starting a new service, pipeline, or platform capability
9
+ - An existing area lacks documentation and context is siloed in people's heads
10
+ - Onboarding new engineers who need to understand the domain fast
11
+
12
+ ## How to run a domain doc session
13
+
14
+ ### Step 1 — Establish scope
15
+
16
+ Ask the user: "What domain or subdomain are we documenting? Give me one sentence on what it does and who it serves."
17
+
18
+ Wait for their answer before proceeding.
19
+
20
+ ### Step 2 — Extract the ubiquitous language
21
+
22
+ Work through these questions one at a time (don't dump them all at once):
23
+
24
+ 1. "What are the core nouns in this domain — the things you work with every day? List them out, don't worry about definitions yet."
25
+ 2. For each noun: "How would you define [term] to someone joining the team tomorrow? Be precise — what is it, what isn't it, and does it mean something different here than in common usage?"
26
+ 3. "Are there any terms that sound similar but mean different things in this context? Or terms outsiders use differently to how you use them?"
27
+
28
+ Document each term as you go in this format:
29
+
30
+ ```
31
+ **[Term]**
32
+ Definition: [precise definition]
33
+ Alias / external term: [if different outside this team]
34
+ Not to be confused with: [if there's a common mix-up]
35
+ ```
36
+
37
+ ### Step 3 — Map bounded contexts
38
+
39
+ Ask:
40
+ - "Does this domain have clear subdomains — areas that could almost stand alone? What are they?"
41
+ - "Where are the edges? What does this domain own vs. depend on from elsewhere?"
42
+ - "What are the integration points — where does data or control flow in or out?"
43
+
44
+ ### Step 4 — Capture domain events
45
+
46
+ Ask:
47
+ - "What are the key things that *happen* in this domain? Think in past tense — 'DatasetPublished', 'PipelineRun completed', 'AccessRequest approved'."
48
+ - "Which of these events trigger something else downstream?"
49
+
50
+ ### Step 5 — Decisions and constraints
51
+
52
+ Document each as an Architecture Decision Record (ADR) stub:
53
+
54
+ ```
55
+ **Decision: [title]**
56
+ Context: [why this came up]
57
+ Decision: [what was decided]
58
+ Consequences: [what this means for the domain]
59
+ Constraints: [privacy/security/compliance if relevant]
60
+ ```
61
+
62
+ ### Step 6 — Goals and success
63
+
64
+ Ask:
65
+ - "What does 'good' look like for this domain? What are you optimising for?"
66
+ - "How do you know when this domain is working well vs. struggling?"
67
+
68
+ ### Step 7 — Produce the output
69
+
70
+ Assemble everything into a structured markdown document saved at `docs/domain/[domain-name].md`.
@@ -0,0 +1,86 @@
1
+ ---
2
+ name: ship-and-watch
3
+ description: >
4
+ Opens a pull request with the GitHub CLI, then watches it to completion.
5
+ Polls CI checks, review approval, and review-thread resolution on a loop
6
+ until every gate is green. Gates on unresolved review threads — not just
7
+ CHANGES_REQUESTED. Merges only when all three gates are green, then writes
8
+ a summary. Use when asked to "ship and watch", "ship it", "raise the PR and
9
+ merge when green", or to create, monitor, and merge a pull request.
10
+ argument-hint: "[base-branch]"
11
+ ---
12
+
13
+ # Ship and Watch
14
+
15
+ A PR is mergeable only when **all three gates** are satisfied:
16
+
17
+ 1. **Checks** — every required CI run has passed.
18
+ 2. **Approval** — at least one approving review.
19
+ 3. **Comments resolved** — every review thread is resolved (use GraphQL, not `reviewDecision`).
20
+
21
+ ## Step 0 — Pre-flight
22
+
23
+ ```bash
24
+ gh auth status
25
+ git rev-parse --abbrev-ref HEAD # must be a feature branch, not base
26
+ git status --porcelain # must be clean
27
+ ```
28
+
29
+ Push if needed: `git push -u origin HEAD`
30
+
31
+ ## Step 1 — Open the PR
32
+
33
+ ```bash
34
+ git log --oneline "$(gh repo view --json defaultBranchRef -q .defaultBranchRef.name)..HEAD"
35
+ gh pr create --base "<base>" --head "$(git rev-parse --abbrev-ref HEAD)" \
36
+ --title "<imperative title>" --body "<what changed and why>"
37
+ PR=$(gh pr view --json number -q .number)
38
+ OPEN_SHA=$(git rev-parse HEAD)
39
+ ```
40
+
41
+ ## Step 2 — Watch loop (poll all three gates each round)
42
+
43
+ **Gate A — Checks:** `gh pr checks "$PR"` — exit 0 = pass, exit 8 = pending, exit 1 = STOP.
44
+
45
+ **Gate B — Approval:** read `reviewDecision` from `gh pr view "$PR" --json reviewDecision`.
46
+ - `APPROVED` → green. `null` → ask user before merging. `REVIEW_REQUIRED` / `CHANGES_REQUESTED` → keep polling.
47
+
48
+ **Gate C — Unresolved threads (GraphQL):**
49
+ ```bash
50
+ gh api graphql -F owner='<owner>' -F repo='<repo>' -F pr="$PR" -f query='
51
+ query($owner:String!, $repo:String!, $pr:Int!) {
52
+ repository(owner:$owner, name:$repo) {
53
+ pullRequest(number:$pr) {
54
+ reviewThreads(first:100) {
55
+ nodes { isResolved isOutdated path comments(first:1) { nodes { author { login } body } } }
56
+ }
57
+ }
58
+ }
59
+ }' | jq '[.data.repository.pullRequest.reviewThreads.nodes[] | select(.isResolved==false)] | length'
60
+ ```
61
+
62
+ Merge only when count == 0. Never assume `isOutdated == true` means resolved — confirm the fix, then resolve.
63
+
64
+ ## Step 3 — Merge
65
+
66
+ When A + C green and `mergeable != CONFLICTING`:
67
+ - `APPROVED` → `gh pr merge "$PR" --squash --delete-branch`
68
+ - `null` → prompt user first; wait for explicit yes.
69
+
70
+ ## Step 4 — Ship report
71
+
72
+ ```bash
73
+ git log --oneline "$OPEN_SHA"..HEAD
74
+ git diff --stat "$OPEN_SHA"..HEAD
75
+ gh pr view "$PR" --json reviews,comments
76
+ gh api "repos/<owner>/<repo>/pulls/$PR/comments"
77
+ ```
78
+
79
+ Report: what changed after the PR opened, each piece of feedback and its resolution, final gate status.
80
+
81
+ ## Hard stops
82
+
83
+ - Gate A exit 1 (check failed).
84
+ - Gate C count > 0 (unresolved threads).
85
+ - `mergeable == CONFLICTING`.
86
+ - 20 polling rounds elapsed — report and hand back.
@@ -0,0 +1,65 @@
1
+ ---
2
+ name: vertical-slices
3
+ description: >
4
+ Guidance for breaking features into tracer bullet vertical slices. Use when
5
+ designing new features, epics, or initiative plans for this service. Each
6
+ slice cuts through ALL integration layers end-to-end.
7
+ ---
8
+
9
+ # Vertical Slices
10
+
11
+ A vertical slice is a thin end-to-end cut through every layer of the system —
12
+ schema, engine, API, tests. It is NOT a horizontal layer (e.g. "add all domain
13
+ models first").
14
+
15
+ ## Rules
16
+
17
+ - **A completed slice is demo-able or verifiable on its own** — no sibling slice needed.
18
+ - **Each slice delivers a narrow but COMPLETE path** — no half-implemented schemas, no skipped tests.
19
+ - **Prefactoring ships first** as Slice 0: scaffold, shared models, engine wiring.
20
+ - **A new slice NEVER modifies a prior slice's public contract** — extend, don't break.
21
+ - **LLM / AI paths are always additive** — core deterministic paths never depend on them.
22
+
23
+ ## Shape of a good slice
24
+
25
+ | Layer | Must include |
26
+ |--------|-------------------------------------------------|
27
+ | Schema | Pydantic request + response models |
28
+ | Engine | Business logic / service layer |
29
+ | API | FastAPI route wired end-to-end |
30
+ | Tests | ≥1 unit test + ≥1 integration test via ASGI client |
31
+
32
+ ## Anti-patterns
33
+
34
+ - "Add all the domain models" — horizontal slice, not vertical.
35
+ - "Wire up the engine layer" — same problem.
36
+ - A slice that cannot be verified without a later slice being complete.
37
+ - An API route without a test.
38
+
39
+ ## Process
40
+
41
+ 1. Identify the narrowest path that delivers value.
42
+ 2. Name it as an imperative user capability: "Detect entities in text".
43
+ 3. List the layers it touches (schema → engine → API → test).
44
+ 4. Write the acceptance criteria as observable outputs, not internal state.
45
+ 5. Implement each layer in order; run the test before calling the slice done.
46
+
47
+ ## Build order for this service
48
+
49
+ ```
50
+ Slice 0: Scaffold (prerequisite for everything)
51
+
52
+ Slice 1: Text entity detection → POST /detect
53
+
54
+ Slice 2: AU financial recognisers (extends Slice 1 entities)
55
+
56
+ Slice 3: Anonymisation → POST /anonymise
57
+
58
+ Slice 5: LLM judge (non-streaming) → POST /judge
59
+
60
+ Slice 6: Streaming judge → POST /judge/stream
61
+
62
+ (Slice 4 — image detection — deferred; see backlog)
63
+ ```
64
+
65
+ Each arrow = hard dependency. Slices at the same level run in parallel.
@@ -0,0 +1,17 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .git/
6
+ .github/
7
+ .mypy_cache/
8
+ .ruff_cache/
9
+ .pytest_cache/
10
+ .hypothesis/
11
+ dist/
12
+ build/
13
+ htmlcov/
14
+ *.coverage
15
+ .env
16
+ .env.*
17
+ .cursor/
@@ -0,0 +1,43 @@
1
+ # Copy this file to .env and fill in real values.
2
+ # All variables are prefixed with PRIVEIL_ except provider API keys.
3
+
4
+ # ── LLM judge (optional) ─────────────────────────────────────────────────────
5
+ # Enables two things:
6
+ # 1. Judge mode on POST /detect and POST /anonymise (mode='judge', default)
7
+ # 2. POST /assess — content risk and sensitivity assessment
8
+ #
9
+ # When unset: mode='judge' degrades silently to mode='fast'; /assess returns 503.
10
+ #
11
+ # Built-in providers — use provider:model format:
12
+ # PRIVEIL_JUDGE_MODEL=anthropic:claude-sonnet-4-6
13
+ # PRIVEIL_JUDGE_MODEL=openai:gpt-4o
14
+ #
15
+ # Custom OpenAI-compatible endpoint (Databricks, Azure AI, Ollama, etc.):
16
+ # PRIVEIL_JUDGE_MODEL=<deployment-name> # model/deployment name on the endpoint
17
+ # PRIVEIL_JUDGE_BASE_URL=https://<workspace>.azuredatabricks.net/serving-endpoints
18
+ # PRIVEIL_JUDGE_API_KEY=<personal-access-token>
19
+ PRIVEIL_JUDGE_MODEL=anthropic:claude-sonnet-4-6
20
+ # PRIVEIL_JUDGE_BASE_URL=
21
+ # PRIVEIL_JUDGE_API_KEY=
22
+
23
+ # Sampling temperature for the judge (0.0 = deterministic).
24
+ PRIVEIL_JUDGE_TEMPERATURE=0.0
25
+
26
+ # ── Provider API keys ────────────────────────────────────────────────────────
27
+ # Required when PRIVEIL_JUDGE_MODEL uses the anthropic provider.
28
+ ANTHROPIC_API_KEY=your-anthropic-api-key-here
29
+
30
+ # Required when PRIVEIL_JUDGE_MODEL uses the openai provider.
31
+ # OPENAI_API_KEY=your-openai-api-key-here
32
+
33
+ # ── spaCy model ──────────────────────────────────────────────────────────────
34
+ # en_core_web_sm — small, fast, used in CI and tests (default)
35
+ # en_core_web_lg — larger, more accurate NER, recommended for production
36
+ # Download: uv run python -m spacy download en_core_web_lg
37
+ PRIVEIL_SPACY_MODEL=en_core_web_sm
38
+
39
+ # ── Server ───────────────────────────────────────────────────────────────────
40
+ PRIVEIL_DEBUG=false
41
+
42
+ # Thread-pool workers for presidio (CPU-bound). Rule of thumb: number of cores.
43
+ PRIVEIL_EXECUTOR_MAX_WORKERS=4
@@ -0,0 +1,18 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ groups:
8
+ python-packages:
9
+ patterns:
10
+ - "*"
11
+ - package-ecosystem: "github-actions"
12
+ directory: "/"
13
+ schedule:
14
+ interval: "weekly"
15
+ groups:
16
+ github-actions:
17
+ patterns:
18
+ - "*"
@@ -0,0 +1,51 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ name: Lint & type-check
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v7
15
+
16
+ - name: Install uv
17
+ uses: astral-sh/setup-uv@v7
18
+ with:
19
+ enable-cache: true
20
+ python-version-file: ".python-version"
21
+
22
+ - name: Install dependencies
23
+ run: uv sync
24
+
25
+ - name: Ruff
26
+ run: uv run ruff check src/ tests/
27
+
28
+ - name: Mypy
29
+ run: uv run mypy src/
30
+
31
+ test:
32
+ name: Tests (Python ${{ matrix.python-version }})
33
+ runs-on: ubuntu-latest
34
+ strategy:
35
+ fail-fast: false
36
+ matrix:
37
+ python-version: ["3.11", "3.12", "3.13"]
38
+
39
+ steps:
40
+ - uses: actions/checkout@v7
41
+
42
+ - name: Build test image
43
+ run: |
44
+ docker build \
45
+ --target test \
46
+ --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
47
+ --tag priveil-test:${{ matrix.python-version }} \
48
+ .
49
+
50
+ - name: Run tests
51
+ run: docker run --rm priveil-test:${{ matrix.python-version }}
@@ -0,0 +1,35 @@
1
+ name: Publish to PyPi
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish-to-pypi:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ id-token: write # required for OIDC trusted publishing
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - name: Set up Python
15
+ uses: actions/setup-python@v5
16
+ with:
17
+ python-version: '3.x'
18
+ - name: Set up uv
19
+ uses: astral-sh/setup-uv@v5
20
+ - name: Build and publish
21
+ run: |
22
+ uv build
23
+ uv publish --trusted-publishing always
24
+ deploy-docs:
25
+ runs-on: ubuntu-latest
26
+ needs:
27
+ - publish-to-pypi
28
+ steps:
29
+ - uses: actions/checkout@v4
30
+ - uses: actions/setup-python@v5
31
+ with:
32
+ python-version: 3.x
33
+ - uses: astral-sh/setup-uv@v5
34
+ - run: make install && make install-docs
35
+ - run: uv run --group docs mkdocs gh-deploy --force
@@ -0,0 +1,15 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .mypy_cache/
8
+ .ruff_cache/
9
+ .pytest_cache/
10
+ .hypothesis/
11
+ *.coverage
12
+ htmlcov/
13
+ .env
14
+ .env.*
15
+ !.env.example
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,38 @@
1
+ ARG PYTHON_VERSION=3.12
2
+
3
+ # ── base: production dependency install ───────────────────────────────────────
4
+ FROM python:${PYTHON_VERSION}-slim AS base
5
+
6
+ COPY --from=ghcr.io/astral-sh/uv:0.11.24 /uv /usr/local/bin/uv
7
+
8
+ ENV UV_SYSTEM_PYTHON=1
9
+
10
+ WORKDIR /app
11
+
12
+ # Dependencies before source for layer caching
13
+ COPY pyproject.toml uv.lock ./
14
+ # uv only includes the dev group by default; models must be explicit.
15
+ RUN uv sync --frozen --no-dev --group models --no-cache
16
+
17
+ COPY src/ ./src/
18
+ RUN uv pip install --no-deps . --no-cache-dir
19
+
20
+ # ── runtime ───────────────────────────────────────────────────────────────────
21
+ FROM base AS runtime
22
+
23
+ # Production deployments use en_core_web_lg; download at deploy time via:
24
+ # PRIVEIL_SPACY_MODEL=en_core_web_lg python -m spacy download en_core_web_lg
25
+ # or bake into a derived image.
26
+ EXPOSE 8000
27
+
28
+ CMD ["uv", "run", "uvicorn", "priveil.app:app", "--host", "0.0.0.0", "--port", "8000"]
29
+
30
+ # ── test ──────────────────────────────────────────────────────────────────────
31
+ FROM base AS test
32
+
33
+ # --all-groups includes dev + models (en-core-web-sm) in one step.
34
+ RUN uv sync --frozen --all-groups --no-cache
35
+
36
+ COPY tests/ ./tests/
37
+
38
+ CMD ["uv", "run", "pytest", "tests/", "-v"]
priveil-0.1.0/Makefile ADDED
@@ -0,0 +1,35 @@
1
+ .DEFAULT_GOAL := help
2
+
3
+ .PHONY: help install install-docs serve test lint format docker-build docker-serve docker-test
4
+
5
+ help:
6
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
7
+
8
+ install: ## Install all dependencies
9
+ uv sync --all-groups
10
+
11
+ install-docs: ## Install docs dependencies
12
+ uv sync --group docs
13
+
14
+ serve: ## Run the server locally with hot-reload
15
+ uv run uvicorn priveil.app:app --reload --host 0.0.0.0 --port 8000
16
+
17
+ test: ## Run tests locally
18
+ uv run pytest tests/ -v
19
+
20
+ lint: ## Lint and type-check
21
+ uv run ruff check src/ tests/
22
+ uv run mypy src/
23
+
24
+ format: ## Auto-fix lint issues
25
+ uv run ruff format src/ tests/
26
+ uv run ruff check --fix src/ tests/
27
+
28
+ docker-build: ## Build all Docker images
29
+ docker compose build
30
+
31
+ docker-serve: ## Run the service via Docker
32
+ docker compose up api
33
+
34
+ docker-test: ## Run tests via Docker
35
+ docker compose run --rm test
priveil-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: priveil
3
+ Version: 0.1.0
4
+ Summary: Pseudonymisation service for Australian financial services
5
+ Author-email: Mitchell Lisle <m.lisle90@gmail.com>
6
+ Requires-Python: <4.0,>=3.11
7
+ Requires-Dist: fastapi>=0.138.1
8
+ Requires-Dist: presidio-analyzer>=2.2.362
9
+ Requires-Dist: presidio-anonymizer>=2.2.362
10
+ Requires-Dist: pydantic-ai>=2.0.0
11
+ Requires-Dist: pydantic-settings<3.0,>=2.14.2
12
+ Requires-Dist: pydantic<3.0,>=2.13.4
13
+ Requires-Dist: spacy>=3.8.14
14
+ Requires-Dist: uvicorn[standard]>=0.49.0