priveil 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- priveil-0.1.0/.cursor/skills/conduit-core.md +54 -0
- priveil-0.1.0/.cursor/skills/conduit-py.md +50 -0
- priveil-0.1.0/.cursor/skills/domain-doc.md +70 -0
- priveil-0.1.0/.cursor/skills/ship-and-watch.md +86 -0
- priveil-0.1.0/.cursor/skills/vertical-slices.md +65 -0
- priveil-0.1.0/.dockerignore +17 -0
- priveil-0.1.0/.env.example +43 -0
- priveil-0.1.0/.github/dependabot.yml +18 -0
- priveil-0.1.0/.github/workflows/ci.yml +51 -0
- priveil-0.1.0/.github/workflows/publish.yml +35 -0
- priveil-0.1.0/.gitignore +15 -0
- priveil-0.1.0/.python-version +1 -0
- priveil-0.1.0/Dockerfile +38 -0
- priveil-0.1.0/Makefile +35 -0
- priveil-0.1.0/PKG-INFO +14 -0
- priveil-0.1.0/README.md +258 -0
- priveil-0.1.0/bruno/priveil-api/01-health.bru +15 -0
- priveil-0.1.0/bruno/priveil-api/02-detect-accurate.bru +24 -0
- priveil-0.1.0/bruno/priveil-api/03-detect-fast.bru +24 -0
- priveil-0.1.0/bruno/priveil-api/04-detect-empty-text-422.bru +23 -0
- priveil-0.1.0/bruno/priveil-api/05-detect-unsupported-language-422.bru +23 -0
- priveil-0.1.0/bruno/priveil-api/06-pseudonymise-accurate.bru +23 -0
- priveil-0.1.0/bruno/priveil-api/07-pseudonymise-fast.bru +23 -0
- priveil-0.1.0/bruno/priveil-api/08-pseudonymise-with-detections.bru +37 -0
- priveil-0.1.0/bruno/priveil-api/09-pseudonymise-operator-overrides.bru +27 -0
- priveil-0.1.0/bruno/priveil-api/10-assess-auto-detect.bru +23 -0
- priveil-0.1.0/bruno/priveil-api/11-assess-with-detections.bru +37 -0
- priveil-0.1.0/bruno/priveil-api/12-assess-low-risk.bru +23 -0
- priveil-0.1.0/bruno/priveil-api/13-assess-not-configured-503.bru +36 -0
- priveil-0.1.0/bruno/priveil-api/14-assess-empty-text-422.bru +22 -0
- priveil-0.1.0/bruno/priveil-api/bruno.json +5 -0
- priveil-0.1.0/bruno/priveil-api/environments/local.bru +3 -0
- priveil-0.1.0/docker-compose.yml +15 -0
- priveil-0.1.0/docs/domain/priveil.md +225 -0
- priveil-0.1.0/pyproject.toml +66 -0
- priveil-0.1.0/src/priveil/__init__.py +0 -0
- priveil-0.1.0/src/priveil/api/__init__.py +0 -0
- priveil-0.1.0/src/priveil/api/deps.py +43 -0
- priveil-0.1.0/src/priveil/api/routes/__init__.py +0 -0
- priveil-0.1.0/src/priveil/api/routes/assess.py +28 -0
- priveil-0.1.0/src/priveil/api/routes/detect.py +27 -0
- priveil-0.1.0/src/priveil/api/routes/health.py +14 -0
- priveil-0.1.0/src/priveil/api/routes/pseudonymise.py +37 -0
- priveil-0.1.0/src/priveil/app.py +75 -0
- priveil-0.1.0/src/priveil/domain/__init__.py +0 -0
- priveil-0.1.0/src/priveil/domain/assessment.py +44 -0
- priveil-0.1.0/src/priveil/domain/detection.py +45 -0
- priveil-0.1.0/src/priveil/domain/entities.py +69 -0
- priveil-0.1.0/src/priveil/domain/judgement.py +23 -0
- priveil-0.1.0/src/priveil/domain/pseudonymisation.py +46 -0
- priveil-0.1.0/src/priveil/engine/__init__.py +0 -0
- priveil-0.1.0/src/priveil/engine/analyser.py +99 -0
- priveil-0.1.0/src/priveil/engine/pseudonymiser.py +145 -0
- priveil-0.1.0/src/priveil/judge/__init__.py +0 -0
- priveil-0.1.0/src/priveil/judge/assessor.py +122 -0
- priveil-0.1.0/src/priveil/judge/model.py +82 -0
- priveil-0.1.0/src/priveil/judge/prompts/assessor.md +34 -0
- priveil-0.1.0/src/priveil/judge/prompts/refiner.md +19 -0
- priveil-0.1.0/src/priveil/judge/refiner.py +144 -0
- priveil-0.1.0/src/priveil/recognisers/__init__.py +0 -0
- priveil-0.1.0/src/priveil/recognisers/au_abn.py +46 -0
- priveil-0.1.0/src/priveil/recognisers/au_acn.py +45 -0
- priveil-0.1.0/src/priveil/recognisers/au_bsb.py +21 -0
- priveil-0.1.0/src/priveil/recognisers/au_medicare.py +45 -0
- priveil-0.1.0/src/priveil/recognisers/au_phone.py +30 -0
- priveil-0.1.0/src/priveil/recognisers/au_tfn.py +48 -0
- priveil-0.1.0/src/priveil/recognisers/registry.py +27 -0
- priveil-0.1.0/src/priveil/settings.py +30 -0
- priveil-0.1.0/tests/__init__.py +0 -0
- priveil-0.1.0/tests/conftest.py +119 -0
- priveil-0.1.0/tests/integration/__init__.py +0 -0
- priveil-0.1.0/tests/integration/test_assess.py +86 -0
- priveil-0.1.0/tests/integration/test_au_detect.py +79 -0
- priveil-0.1.0/tests/integration/test_detect.py +64 -0
- priveil-0.1.0/tests/integration/test_health.py +12 -0
- priveil-0.1.0/tests/integration/test_pseudonymise.py +77 -0
- priveil-0.1.0/tests/integration/test_refine.py +65 -0
- priveil-0.1.0/tests/unit/__init__.py +0 -0
- priveil-0.1.0/tests/unit/test_analyser.py +79 -0
- priveil-0.1.0/tests/unit/test_assessor.py +89 -0
- priveil-0.1.0/tests/unit/test_entities.py +41 -0
- priveil-0.1.0/tests/unit/test_judge_model.py +93 -0
- priveil-0.1.0/tests/unit/test_pseudonymiser.py +186 -0
- priveil-0.1.0/tests/unit/test_recognisers.py +146 -0
- priveil-0.1.0/tests/unit/test_refiner.py +130 -0
- priveil-0.1.0/uv.lock +3833 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: conduit-core
|
|
3
|
+
description: >
|
|
4
|
+
Forces the laziest clean pipeline that actually works. Question whether the
|
|
5
|
+
transform needs to exist at all, reach for standard and internal libraries
|
|
6
|
+
before writing anything new, validate at every trust boundary, write pure
|
|
7
|
+
functions that compose, document contracts, and treat security as
|
|
8
|
+
load-bearing. This is the language-agnostic core. Pair it with conduit-py
|
|
9
|
+
for concrete tooling.
|
|
10
|
+
argument-hint: "[lite|full|ultra]"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Conduit — Core
|
|
14
|
+
|
|
15
|
+
Lazy means efficient, not careless. The best code is the code never written.
|
|
16
|
+
Data flows in from hostile territory, gets validated, passes through the
|
|
17
|
+
minimum pure transforms required, and exits clean. Security is load-bearing,
|
|
18
|
+
not decorative. Types are documentation. The schema is the gate.
|
|
19
|
+
|
|
20
|
+
## The Ladder
|
|
21
|
+
|
|
22
|
+
Stop at the first rung that holds:
|
|
23
|
+
|
|
24
|
+
1. **Does this need to exist at all?** Speculative transform = skip it. (YAGNI)
|
|
25
|
+
2. **Does an internal library already do it?** Use it.
|
|
26
|
+
3. **Does the standard library do it?** Reach for it before any custom logic.
|
|
27
|
+
4. **Does this data cross a serialization boundary?** Give it a schema.
|
|
28
|
+
5. **Is this a transformation?** Pure function. Input → output, no side effects.
|
|
29
|
+
6. **Can it be a pipeline?** Compose. One function per concern.
|
|
30
|
+
7. **Does it cross a trust boundary?** Validate in, sanitize out, log the action (never the secret).
|
|
31
|
+
8. **Is the contract documented?** Docstring — one-line summary, args, return, errors.
|
|
32
|
+
9. **Only then:** write the minimum implementation that works.
|
|
33
|
+
|
|
34
|
+
## Rules
|
|
35
|
+
|
|
36
|
+
**Laziness**
|
|
37
|
+
- No unrequested abstractions.
|
|
38
|
+
- Deletion over addition. Shortest working diff wins.
|
|
39
|
+
- `conduit:` comments mark deliberate simplifications — name the ceiling and the upgrade path.
|
|
40
|
+
|
|
41
|
+
**Security Champion**
|
|
42
|
+
- All external data is hostile until a parsed schema says otherwise.
|
|
43
|
+
- No secrets in logs, no secrets in code. Secrets come from the environment.
|
|
44
|
+
- Parameterised queries only — never build SQL or shell commands by string interpolation.
|
|
45
|
+
- Least privilege: functions receive only the data they need.
|
|
46
|
+
- On bad input: reject loudly with a typed error, never silently coerce.
|
|
47
|
+
|
|
48
|
+
## Intensity
|
|
49
|
+
|
|
50
|
+
| Level | What changes |
|
|
51
|
+
|-------|-------------|
|
|
52
|
+
| **lite** | Build what's asked with type hints and a docstring. |
|
|
53
|
+
| **full** | Ladder enforced. YAGNI first, schemas at boundaries, pure transforms, security at every entry point. Default. |
|
|
54
|
+
| **ultra** | YAGNI extremist. Delete before adding. Challenge the requirement before writing a line. |
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: conduit-py
|
|
3
|
+
description: >
|
|
4
|
+
Python tooling for the conduit discipline — Pydantic at every serialization
|
|
5
|
+
boundary, pure composable functions, stdlib-first functional style,
|
|
6
|
+
Google-style docstrings, and a pytest + hypothesis test stack.
|
|
7
|
+
Read conduit-core for the ladder, philosophy, and security principles.
|
|
8
|
+
argument-hint: "[lite|full|ultra]"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
> Read `conduit-core` before this file. This skill adds Python-specific tooling.
|
|
12
|
+
|
|
13
|
+
# Conduit — Python
|
|
14
|
+
|
|
15
|
+
## Data & Types
|
|
16
|
+
|
|
17
|
+
- Pydantic models at every serialization boundary: API inputs, env vars (`BaseSettings`), external service responses.
|
|
18
|
+
- Never reach into deserialized JSON with `.get()` chains when a model exists. Use `Model.model_validate(data)`.
|
|
19
|
+
- Type hints on every function signature, return type included.
|
|
20
|
+
- Immutable by default: frozen Pydantic models, tuples over lists where mutation adds nothing.
|
|
21
|
+
- Never use `Any` without `# conduit: Any here because [reason]`.
|
|
22
|
+
|
|
23
|
+
## Functional Style
|
|
24
|
+
|
|
25
|
+
- Pure functions are the default. Same input → same output, always.
|
|
26
|
+
- Small, composable units. One responsibility each.
|
|
27
|
+
- Generator pipelines for large data: never load what you can stream.
|
|
28
|
+
- `functools` first: `partial`, `reduce`, `lru_cache`.
|
|
29
|
+
- No mutable default arguments. Ever.
|
|
30
|
+
|
|
31
|
+
## Security Champion (Python surface)
|
|
32
|
+
|
|
33
|
+
- No secrets in logs, no secrets in code. Env vars via `BaseSettings`.
|
|
34
|
+
- Secret variables wrapped in Pydantic's `SecretStr` or `Secret[T]`.
|
|
35
|
+
- Parameterised queries only. No f-strings into SQL or shell commands.
|
|
36
|
+
- On bad input: reject loudly with a clear `ValueError` or `ValidationError`, never silently coerce.
|
|
37
|
+
|
|
38
|
+
## Documentation
|
|
39
|
+
|
|
40
|
+
- Google-style docstrings on every non-trivial function.
|
|
41
|
+
- One-line imperative summary (`Validate and parse...`, `Transform...`).
|
|
42
|
+
- Args / Returns / Raises — one short line each, only what isn't obvious from the type.
|
|
43
|
+
- Never document what the type signature already says.
|
|
44
|
+
|
|
45
|
+
## Test stack
|
|
46
|
+
|
|
47
|
+
- `pytest` for all test running. No `unittest`.
|
|
48
|
+
- `hypothesis` for data edge cases: null values, empty collections, out-of-range values, schema surprises.
|
|
49
|
+
- `asyncio_mode = "auto"` in pytest config; use `async def test_` directly.
|
|
50
|
+
- No mocks of internal logic — test real behaviour with real (small) data.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Domain Doc
|
|
2
|
+
|
|
3
|
+
You are a Domain-Driven Design facilitator helping a data engineering or data platform team build out their domain documentation. Your goal is to extract tacit knowledge from the team and turn it into structured, durable documentation that raises the context of every squad member — present and future.
|
|
4
|
+
|
|
5
|
+
## When to use this skill
|
|
6
|
+
|
|
7
|
+
- A new domain or subdomain is being defined
|
|
8
|
+
- A team is starting a new service, pipeline, or platform capability
|
|
9
|
+
- An existing area lacks documentation and context is siloed in people's heads
|
|
10
|
+
- Onboarding new engineers who need to understand the domain fast
|
|
11
|
+
|
|
12
|
+
## How to run a domain doc session
|
|
13
|
+
|
|
14
|
+
### Step 1 — Establish scope
|
|
15
|
+
|
|
16
|
+
Ask the user: "What domain or subdomain are we documenting? Give me one sentence on what it does and who it serves."
|
|
17
|
+
|
|
18
|
+
Wait for their answer before proceeding.
|
|
19
|
+
|
|
20
|
+
### Step 2 — Extract the ubiquitous language
|
|
21
|
+
|
|
22
|
+
Work through these questions one at a time (don't dump them all at once):
|
|
23
|
+
|
|
24
|
+
1. "What are the core nouns in this domain — the things you work with every day? List them out, don't worry about definitions yet."
|
|
25
|
+
2. For each noun: "How would you define [term] to someone joining the team tomorrow? Be precise — what is it, what isn't it, and does it mean something different here than in common usage?"
|
|
26
|
+
3. "Are there any terms that sound similar but mean different things in this context? Or terms outsiders use differently to how you use them?"
|
|
27
|
+
|
|
28
|
+
Document each term as you go in this format:
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
**[Term]**
|
|
32
|
+
Definition: [precise definition]
|
|
33
|
+
Alias / external term: [if different outside this team]
|
|
34
|
+
Not to be confused with: [if there's a common mix-up]
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Step 3 — Map bounded contexts
|
|
38
|
+
|
|
39
|
+
Ask:
|
|
40
|
+
- "Does this domain have clear subdomains — areas that could almost stand alone? What are they?"
|
|
41
|
+
- "Where are the edges? What does this domain own vs. depend on from elsewhere?"
|
|
42
|
+
- "What are the integration points — where does data or control flow in or out?"
|
|
43
|
+
|
|
44
|
+
### Step 4 — Capture domain events
|
|
45
|
+
|
|
46
|
+
Ask:
|
|
47
|
+
- "What are the key things that *happen* in this domain? Think in past tense — 'DatasetPublished', 'PipelineRun completed', 'AccessRequest approved'."
|
|
48
|
+
- "Which of these events trigger something else downstream?"
|
|
49
|
+
|
|
50
|
+
### Step 5 — Decisions and constraints
|
|
51
|
+
|
|
52
|
+
Document each as an Architecture Decision Record (ADR) stub:
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
**Decision: [title]**
|
|
56
|
+
Context: [why this came up]
|
|
57
|
+
Decision: [what was decided]
|
|
58
|
+
Consequences: [what this means for the domain]
|
|
59
|
+
Constraints: [privacy/security/compliance if relevant]
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Step 6 — Goals and success
|
|
63
|
+
|
|
64
|
+
Ask:
|
|
65
|
+
- "What does 'good' look like for this domain? What are you optimising for?"
|
|
66
|
+
- "How do you know when this domain is working well vs. struggling?"
|
|
67
|
+
|
|
68
|
+
### Step 7 — Produce the output
|
|
69
|
+
|
|
70
|
+
Assemble everything into a structured markdown document saved at `docs/domain/[domain-name].md`.
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ship-and-watch
|
|
3
|
+
description: >
|
|
4
|
+
Opens a pull request with the GitHub CLI, then watches it to completion.
|
|
5
|
+
Polls CI checks, review approval, and review-thread resolution on a loop
|
|
6
|
+
until every gate is green. Gates on unresolved review threads — not just
|
|
7
|
+
CHANGES_REQUESTED. Merges only when all three gates are green, then writes
|
|
8
|
+
a summary. Use when asked to "ship and watch", "ship it", "raise the PR and
|
|
9
|
+
merge when green", or to create, monitor, and merge a pull request.
|
|
10
|
+
argument-hint: "[base-branch]"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Ship and Watch
|
|
14
|
+
|
|
15
|
+
A PR is mergeable only when **all three gates** are satisfied:
|
|
16
|
+
|
|
17
|
+
1. **Checks** — every required CI run has passed.
|
|
18
|
+
2. **Approval** — at least one approving review.
|
|
19
|
+
3. **Comments resolved** — every review thread is resolved (use GraphQL, not `reviewDecision`).
|
|
20
|
+
|
|
21
|
+
## Step 0 — Pre-flight
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
gh auth status
|
|
25
|
+
git rev-parse --abbrev-ref HEAD # must be a feature branch, not base
|
|
26
|
+
git status --porcelain # must be clean
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Push if needed: `git push -u origin HEAD`
|
|
30
|
+
|
|
31
|
+
## Step 1 — Open the PR
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
git log --oneline "$(gh repo view --json defaultBranchRef -q .defaultBranchRef.name)..HEAD"
|
|
35
|
+
gh pr create --base "<base>" --head "$(git rev-parse --abbrev-ref HEAD)" \
|
|
36
|
+
--title "<imperative title>" --body "<what changed and why>"
|
|
37
|
+
PR=$(gh pr view --json number -q .number)
|
|
38
|
+
OPEN_SHA=$(git rev-parse HEAD)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Step 2 — Watch loop (poll all three gates each round)
|
|
42
|
+
|
|
43
|
+
**Gate A — Checks:** `gh pr checks "$PR"` — exit 0 = pass, exit 8 = pending, exit 1 = STOP.
|
|
44
|
+
|
|
45
|
+
**Gate B — Approval:** read `reviewDecision` from `gh pr view "$PR" --json reviewDecision`.
|
|
46
|
+
- `APPROVED` → green. `null` → ask user before merging. `REVIEW_REQUIRED` / `CHANGES_REQUESTED` → keep polling.
|
|
47
|
+
|
|
48
|
+
**Gate C — Unresolved threads (GraphQL):**
|
|
49
|
+
```bash
|
|
50
|
+
gh api graphql -F owner='<owner>' -F repo='<repo>' -F pr="$PR" -f query='
|
|
51
|
+
query($owner:String!, $repo:String!, $pr:Int!) {
|
|
52
|
+
repository(owner:$owner, name:$repo) {
|
|
53
|
+
pullRequest(number:$pr) {
|
|
54
|
+
reviewThreads(first:100) {
|
|
55
|
+
nodes { isResolved isOutdated path comments(first:1) { nodes { author { login } body } } }
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}' | jq '[.data.repository.pullRequest.reviewThreads.nodes[] | select(.isResolved==false)] | length'
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Merge only when count == 0. Never assume `isOutdated == true` means resolved — confirm the fix, then resolve.
|
|
63
|
+
|
|
64
|
+
## Step 3 — Merge
|
|
65
|
+
|
|
66
|
+
When A + C green and `mergeable != CONFLICTING`:
|
|
67
|
+
- `APPROVED` → `gh pr merge "$PR" --squash --delete-branch`
|
|
68
|
+
- `null` → prompt user first; wait for explicit yes.
|
|
69
|
+
|
|
70
|
+
## Step 4 — Ship report
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
git log --oneline "$OPEN_SHA"..HEAD
|
|
74
|
+
git diff --stat "$OPEN_SHA"..HEAD
|
|
75
|
+
gh pr view "$PR" --json reviews,comments
|
|
76
|
+
gh api "repos/<owner>/<repo>/pulls/$PR/comments"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Report: what changed after the PR opened, each piece of feedback and its resolution, final gate status.
|
|
80
|
+
|
|
81
|
+
## Hard stops
|
|
82
|
+
|
|
83
|
+
- Gate A exit 1 (check failed).
|
|
84
|
+
- Gate C count > 0 (unresolved threads).
|
|
85
|
+
- `mergeable == CONFLICTING`.
|
|
86
|
+
- 20 polling rounds elapsed — report and hand back.
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: vertical-slices
|
|
3
|
+
description: >
|
|
4
|
+
Guidance for breaking features into tracer bullet vertical slices. Use when
|
|
5
|
+
designing new features, epics, or initiative plans for this service. Each
|
|
6
|
+
slice cuts through ALL integration layers end-to-end.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Vertical Slices
|
|
10
|
+
|
|
11
|
+
A vertical slice is a thin end-to-end cut through every layer of the system —
|
|
12
|
+
schema, engine, API, tests. It is NOT a horizontal layer (e.g. "add all domain
|
|
13
|
+
models first").
|
|
14
|
+
|
|
15
|
+
## Rules
|
|
16
|
+
|
|
17
|
+
- **A completed slice is demo-able or verifiable on its own** — no sibling slice needed.
|
|
18
|
+
- **Each slice delivers a narrow but COMPLETE path** — no half-implemented schemas, no skipped tests.
|
|
19
|
+
- **Prefactoring ships first** as Slice 0: scaffold, shared models, engine wiring.
|
|
20
|
+
- **A new slice NEVER modifies a prior slice's public contract** — extend, don't break.
|
|
21
|
+
- **LLM / AI paths are always additive** — core deterministic paths never depend on them.
|
|
22
|
+
|
|
23
|
+
## Shape of a good slice
|
|
24
|
+
|
|
25
|
+
| Layer | Must include |
|
|
26
|
+
|--------|-------------------------------------------------|
|
|
27
|
+
| Schema | Pydantic request + response models |
|
|
28
|
+
| Engine | Business logic / service layer |
|
|
29
|
+
| API | FastAPI route wired end-to-end |
|
|
30
|
+
| Tests | ≥1 unit test + ≥1 integration test via ASGI client |
|
|
31
|
+
|
|
32
|
+
## Anti-patterns
|
|
33
|
+
|
|
34
|
+
- "Add all the domain models" — horizontal slice, not vertical.
|
|
35
|
+
- "Wire up the engine layer" — same problem.
|
|
36
|
+
- A slice that cannot be verified without a later slice being complete.
|
|
37
|
+
- An API route without a test.
|
|
38
|
+
|
|
39
|
+
## Process
|
|
40
|
+
|
|
41
|
+
1. Identify the narrowest path that delivers value.
|
|
42
|
+
2. Name it as an imperative user capability: "Detect entities in text".
|
|
43
|
+
3. List the layers it touches (schema → engine → API → test).
|
|
44
|
+
4. Write the acceptance criteria as observable outputs, not internal state.
|
|
45
|
+
5. Implement each layer in order; run the test before calling the slice done.
|
|
46
|
+
|
|
47
|
+
## Build order for this service
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
Slice 0: Scaffold (prerequisite for everything)
|
|
51
|
+
↓
|
|
52
|
+
Slice 1: Text entity detection → POST /detect
|
|
53
|
+
↓
|
|
54
|
+
Slice 2: AU financial recognisers (extends Slice 1 entities)
|
|
55
|
+
↓
|
|
56
|
+
Slice 3: Anonymisation → POST /anonymise
|
|
57
|
+
↓
|
|
58
|
+
Slice 5: LLM judge (non-streaming) → POST /judge
|
|
59
|
+
↓
|
|
60
|
+
Slice 6: Streaming judge → POST /judge/stream
|
|
61
|
+
|
|
62
|
+
(Slice 4 — image detection — deferred; see backlog)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Each arrow = hard dependency. Slices at the same level run in parallel.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copy this file to .env and fill in real values.
|
|
2
|
+
# All variables are prefixed with PRIVEIL_ except provider API keys.
|
|
3
|
+
|
|
4
|
+
# ── LLM judge (optional) ─────────────────────────────────────────────────────
|
|
5
|
+
# Enables two things:
|
|
6
|
+
# 1. Judge mode on POST /detect and POST /anonymise (mode='judge', default)
|
|
7
|
+
# 2. POST /assess — content risk and sensitivity assessment
|
|
8
|
+
#
|
|
9
|
+
# When unset: mode='judge' degrades silently to mode='fast'; /assess returns 503.
|
|
10
|
+
#
|
|
11
|
+
# Built-in providers — use provider:model format:
|
|
12
|
+
# PRIVEIL_JUDGE_MODEL=anthropic:claude-sonnet-4-6
|
|
13
|
+
# PRIVEIL_JUDGE_MODEL=openai:gpt-4o
|
|
14
|
+
#
|
|
15
|
+
# Custom OpenAI-compatible endpoint (Databricks, Azure AI, Ollama, etc.):
|
|
16
|
+
# PRIVEIL_JUDGE_MODEL=<deployment-name> # model/deployment name on the endpoint
|
|
17
|
+
# PRIVEIL_JUDGE_BASE_URL=https://<workspace>.azuredatabricks.net/serving-endpoints
|
|
18
|
+
# PRIVEIL_JUDGE_API_KEY=<personal-access-token>
|
|
19
|
+
PRIVEIL_JUDGE_MODEL=anthropic:claude-sonnet-4-6
|
|
20
|
+
# PRIVEIL_JUDGE_BASE_URL=
|
|
21
|
+
# PRIVEIL_JUDGE_API_KEY=
|
|
22
|
+
|
|
23
|
+
# Sampling temperature for the judge (0.0 = deterministic).
|
|
24
|
+
PRIVEIL_JUDGE_TEMPERATURE=0.0
|
|
25
|
+
|
|
26
|
+
# ── Provider API keys ────────────────────────────────────────────────────────
|
|
27
|
+
# Required when PRIVEIL_JUDGE_MODEL uses the anthropic provider.
|
|
28
|
+
ANTHROPIC_API_KEY=your-anthropic-api-key-here
|
|
29
|
+
|
|
30
|
+
# Required when PRIVEIL_JUDGE_MODEL uses the openai provider.
|
|
31
|
+
# OPENAI_API_KEY=your-openai-api-key-here
|
|
32
|
+
|
|
33
|
+
# ── spaCy model ──────────────────────────────────────────────────────────────
|
|
34
|
+
# en_core_web_sm — small, fast, used in CI and tests (default)
|
|
35
|
+
# en_core_web_lg — larger, more accurate NER, recommended for production
|
|
36
|
+
# Download: uv run python -m spacy download en_core_web_lg
|
|
37
|
+
PRIVEIL_SPACY_MODEL=en_core_web_sm
|
|
38
|
+
|
|
39
|
+
# ── Server ───────────────────────────────────────────────────────────────────
|
|
40
|
+
PRIVEIL_DEBUG=false
|
|
41
|
+
|
|
42
|
+
# Thread-pool workers for presidio (CPU-bound). Rule of thumb: number of cores.
|
|
43
|
+
PRIVEIL_EXECUTOR_MAX_WORKERS=4
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: "pip"
|
|
4
|
+
directory: "/"
|
|
5
|
+
schedule:
|
|
6
|
+
interval: "weekly"
|
|
7
|
+
groups:
|
|
8
|
+
python-packages:
|
|
9
|
+
patterns:
|
|
10
|
+
- "*"
|
|
11
|
+
- package-ecosystem: "github-actions"
|
|
12
|
+
directory: "/"
|
|
13
|
+
schedule:
|
|
14
|
+
interval: "weekly"
|
|
15
|
+
groups:
|
|
16
|
+
github-actions:
|
|
17
|
+
patterns:
|
|
18
|
+
- "*"
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
name: Lint & type-check
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v7
|
|
15
|
+
|
|
16
|
+
- name: Install uv
|
|
17
|
+
uses: astral-sh/setup-uv@v7
|
|
18
|
+
with:
|
|
19
|
+
enable-cache: true
|
|
20
|
+
python-version-file: ".python-version"
|
|
21
|
+
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: uv sync
|
|
24
|
+
|
|
25
|
+
- name: Ruff
|
|
26
|
+
run: uv run ruff check src/ tests/
|
|
27
|
+
|
|
28
|
+
- name: Mypy
|
|
29
|
+
run: uv run mypy src/
|
|
30
|
+
|
|
31
|
+
test:
|
|
32
|
+
name: Tests (Python ${{ matrix.python-version }})
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
strategy:
|
|
35
|
+
fail-fast: false
|
|
36
|
+
matrix:
|
|
37
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
38
|
+
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v7
|
|
41
|
+
|
|
42
|
+
- name: Build test image
|
|
43
|
+
run: |
|
|
44
|
+
docker build \
|
|
45
|
+
--target test \
|
|
46
|
+
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
|
|
47
|
+
--tag priveil-test:${{ matrix.python-version }} \
|
|
48
|
+
.
|
|
49
|
+
|
|
50
|
+
- name: Run tests
|
|
51
|
+
run: docker run --rm priveil-test:${{ matrix.python-version }}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: Publish to PyPi
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish-to-pypi:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
id-token: write # required for OIDC trusted publishing
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- name: Set up Python
|
|
15
|
+
uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: '3.x'
|
|
18
|
+
- name: Set up uv
|
|
19
|
+
uses: astral-sh/setup-uv@v5
|
|
20
|
+
- name: Build and publish
|
|
21
|
+
run: |
|
|
22
|
+
uv build
|
|
23
|
+
uv publish --trusted-publishing always
|
|
24
|
+
deploy-docs:
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
needs:
|
|
27
|
+
- publish-to-pypi
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v4
|
|
30
|
+
- uses: actions/setup-python@v5
|
|
31
|
+
with:
|
|
32
|
+
python-version: 3.x
|
|
33
|
+
- uses: astral-sh/setup-uv@v5
|
|
34
|
+
- run: make install && make install-docs
|
|
35
|
+
- run: uv run --group docs mkdocs gh-deploy --force
|
priveil-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
priveil-0.1.0/Dockerfile
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
ARG PYTHON_VERSION=3.12
|
|
2
|
+
|
|
3
|
+
# ── base: production dependency install ───────────────────────────────────────
|
|
4
|
+
FROM python:${PYTHON_VERSION}-slim AS base
|
|
5
|
+
|
|
6
|
+
COPY --from=ghcr.io/astral-sh/uv:0.11.24 /uv /usr/local/bin/uv
|
|
7
|
+
|
|
8
|
+
ENV UV_SYSTEM_PYTHON=1
|
|
9
|
+
|
|
10
|
+
WORKDIR /app
|
|
11
|
+
|
|
12
|
+
# Dependencies before source for layer caching
|
|
13
|
+
COPY pyproject.toml uv.lock ./
|
|
14
|
+
# uv only includes the dev group by default; models must be explicit.
|
|
15
|
+
RUN uv sync --frozen --no-dev --group models --no-cache
|
|
16
|
+
|
|
17
|
+
COPY src/ ./src/
|
|
18
|
+
RUN uv pip install --no-deps . --no-cache-dir
|
|
19
|
+
|
|
20
|
+
# ── runtime ───────────────────────────────────────────────────────────────────
|
|
21
|
+
FROM base AS runtime
|
|
22
|
+
|
|
23
|
+
# Production deployments use en_core_web_lg; download at deploy time via:
|
|
24
|
+
# PRIVEIL_SPACY_MODEL=en_core_web_lg python -m spacy download en_core_web_lg
|
|
25
|
+
# or bake into a derived image.
|
|
26
|
+
EXPOSE 8000
|
|
27
|
+
|
|
28
|
+
CMD ["uv", "run", "uvicorn", "priveil.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
29
|
+
|
|
30
|
+
# ── test ──────────────────────────────────────────────────────────────────────
|
|
31
|
+
FROM base AS test
|
|
32
|
+
|
|
33
|
+
# --all-groups includes dev + models (en-core-web-sm) in one step.
|
|
34
|
+
RUN uv sync --frozen --all-groups --no-cache
|
|
35
|
+
|
|
36
|
+
COPY tests/ ./tests/
|
|
37
|
+
|
|
38
|
+
CMD ["uv", "run", "pytest", "tests/", "-v"]
|
priveil-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
.DEFAULT_GOAL := help
|
|
2
|
+
|
|
3
|
+
.PHONY: help install install-docs serve test lint format docker-build docker-serve docker-test
|
|
4
|
+
|
|
5
|
+
help:
|
|
6
|
+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
|
|
7
|
+
|
|
8
|
+
install: ## Install all dependencies
|
|
9
|
+
uv sync --all-groups
|
|
10
|
+
|
|
11
|
+
install-docs: ## Install docs dependencies
|
|
12
|
+
uv sync --group docs
|
|
13
|
+
|
|
14
|
+
serve: ## Run the server locally with hot-reload
|
|
15
|
+
uv run uvicorn priveil.app:app --reload --host 0.0.0.0 --port 8000
|
|
16
|
+
|
|
17
|
+
test: ## Run tests locally
|
|
18
|
+
uv run pytest tests/ -v
|
|
19
|
+
|
|
20
|
+
lint: ## Lint and type-check
|
|
21
|
+
uv run ruff check src/ tests/
|
|
22
|
+
uv run mypy src/
|
|
23
|
+
|
|
24
|
+
format: ## Auto-fix lint issues
|
|
25
|
+
uv run ruff format src/ tests/
|
|
26
|
+
uv run ruff check --fix src/ tests/
|
|
27
|
+
|
|
28
|
+
docker-build: ## Build all Docker images
|
|
29
|
+
docker compose build
|
|
30
|
+
|
|
31
|
+
docker-serve: ## Run the service via Docker
|
|
32
|
+
docker compose up api
|
|
33
|
+
|
|
34
|
+
docker-test: ## Run tests via Docker
|
|
35
|
+
docker compose run --rm test
|
priveil-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: priveil
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pseudonymisation service for Australian financial services
|
|
5
|
+
Author-email: Mitchell Lisle <m.lisle90@gmail.com>
|
|
6
|
+
Requires-Python: <4.0,>=3.11
|
|
7
|
+
Requires-Dist: fastapi>=0.138.1
|
|
8
|
+
Requires-Dist: presidio-analyzer>=2.2.362
|
|
9
|
+
Requires-Dist: presidio-anonymizer>=2.2.362
|
|
10
|
+
Requires-Dist: pydantic-ai>=2.0.0
|
|
11
|
+
Requires-Dist: pydantic-settings<3.0,>=2.14.2
|
|
12
|
+
Requires-Dist: pydantic<3.0,>=2.13.4
|
|
13
|
+
Requires-Dist: spacy>=3.8.14
|
|
14
|
+
Requires-Dist: uvicorn[standard]>=0.49.0
|