phi-scan 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phi_scan-0.3.0/.gitignore +27 -0
- phi_scan-0.3.0/.phi-scanner.yml +90 -0
- phi_scan-0.3.0/.pre-commit-hooks.yaml +24 -0
- phi_scan-0.3.0/CHANGELOG.md +66 -0
- phi_scan-0.3.0/LICENSE +21 -0
- phi_scan-0.3.0/PKG-INFO +158 -0
- phi_scan-0.3.0/README.md +83 -0
- phi_scan-0.3.0/phi_scan/__init__.py +4 -0
- phi_scan-0.3.0/phi_scan/audit.py +495 -0
- phi_scan-0.3.0/phi_scan/baseline.py +650 -0
- phi_scan-0.3.0/phi_scan/cache.py +499 -0
- phi_scan-0.3.0/phi_scan/cli.py +1722 -0
- phi_scan-0.3.0/phi_scan/compliance.py +1 -0
- phi_scan-0.3.0/phi_scan/config.py +415 -0
- phi_scan-0.3.0/phi_scan/constants.py +700 -0
- phi_scan-0.3.0/phi_scan/detection_coordinator.py +491 -0
- phi_scan-0.3.0/phi_scan/diff.py +213 -0
- phi_scan-0.3.0/phi_scan/exceptions.py +135 -0
- phi_scan-0.3.0/phi_scan/fhir_recognizer.py +296 -0
- phi_scan-0.3.0/phi_scan/fixer.py +757 -0
- phi_scan-0.3.0/phi_scan/hashing.py +80 -0
- phi_scan-0.3.0/phi_scan/help_text.py +364 -0
- phi_scan-0.3.0/phi_scan/hl7_scanner.py +284 -0
- phi_scan-0.3.0/phi_scan/logging_config.py +186 -0
- phi_scan-0.3.0/phi_scan/models.py +525 -0
- phi_scan-0.3.0/phi_scan/nlp_detector.py +336 -0
- phi_scan-0.3.0/phi_scan/notifier.py +1 -0
- phi_scan-0.3.0/phi_scan/output.py +2322 -0
- phi_scan-0.3.0/phi_scan/plugin_api.py +1 -0
- phi_scan-0.3.0/phi_scan/py.typed +0 -0
- phi_scan-0.3.0/phi_scan/regex_detector.py +1334 -0
- phi_scan-0.3.0/phi_scan/report.py +1 -0
- phi_scan-0.3.0/phi_scan/scanner.py +868 -0
- phi_scan-0.3.0/phi_scan/suppression.py +203 -0
- phi_scan-0.3.0/pyproject.toml +91 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Python-generated files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[oc]
|
|
4
|
+
build/
|
|
5
|
+
dist/
|
|
6
|
+
wheels/
|
|
7
|
+
*.egg-info
|
|
8
|
+
|
|
9
|
+
# Virtual environments
|
|
10
|
+
.venv
|
|
11
|
+
|
|
12
|
+
# Environment variables — never commit credentials or PHI values
|
|
13
|
+
.env
|
|
14
|
+
|
|
15
|
+
# SQLite databases — may contain PHI; exclude from version control
|
|
16
|
+
*.db
|
|
17
|
+
*.sqlite3
|
|
18
|
+
|
|
19
|
+
# PhiScan internal state — stores SHA-256 hashes, never raw PHI
|
|
20
|
+
.phi-scanner/
|
|
21
|
+
|
|
22
|
+
# PhiScan scan output
|
|
23
|
+
phi-report.json
|
|
24
|
+
|
|
25
|
+
# Test coverage artifacts
|
|
26
|
+
.coverage
|
|
27
|
+
coverage.xml
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# PhiScan default configuration
|
|
2
|
+
# Generated by: phi-scan config init
|
|
3
|
+
# Reference: phi-scan explain config
|
|
4
|
+
|
|
5
|
+
# Schema version for forward-compatibility. Increment when breaking changes
|
|
6
|
+
# are introduced to this file's structure so the config loader can migrate.
|
|
7
|
+
version: 1
|
|
8
|
+
|
|
9
|
+
scan:
|
|
10
|
+
# Minimum confidence score to report a finding (0.0–1.0).
|
|
11
|
+
# Set to 0.7 for this repo: context-dependent patterns without matching context
|
|
12
|
+
# keywords (e.g. HEALTH_PLAN_NUMBER matching any 8–20 char identifier) fire at
|
|
13
|
+
# 0.65 (_CONFIDENCE_CONTEXT_ABSENT) and generate false positives in source code.
|
|
14
|
+
# Context-confirmed findings (0.88) and structural patterns (SSN, MBI, etc.) are
|
|
15
|
+
# well above 0.7 and are unaffected. Clinical repos should set this to 0.5–0.6.
|
|
16
|
+
confidence_threshold: 0.7
|
|
17
|
+
|
|
18
|
+
# Minimum severity level to include in output and exit-code evaluation.
|
|
19
|
+
# Maps to the SeverityLevel enum (not RiskLevel — that is per-scan aggregate).
|
|
20
|
+
# Accepted values (lowercase): low, medium, high
|
|
21
|
+
# The config loader maps lowercase → enum: low → SeverityLevel.LOW, etc.
|
|
22
|
+
# ConfigurationError is raised on any value not in this set.
|
|
23
|
+
severity_threshold: low
|
|
24
|
+
|
|
25
|
+
# Maximum file size to scan. Files exceeding this limit are skipped and logged.
|
|
26
|
+
max_file_size_mb: 10
|
|
27
|
+
|
|
28
|
+
# SECURITY: Symlink following is disabled and must remain false.
|
|
29
|
+
# Setting this to true is a security violation — it allows traversal to escape
|
|
30
|
+
# the repository boundary and cause infinite loops in CI/CD environments.
|
|
31
|
+
# The config loader reads this field: if set to true, it raises ConfigurationError
|
|
32
|
+
# and the scan does not start. This field is not silently ignored.
|
|
33
|
+
follow_symlinks: false
|
|
34
|
+
|
|
35
|
+
# Optional allowlist of file extensions to scan.
|
|
36
|
+
# When set to null (default), all non-binary text files are scanned.
|
|
37
|
+
# Example: [".py", ".js", ".ts", ".yaml", ".yml"] (.yaml and .yml are equivalent)
|
|
38
|
+
include_extensions: null
|
|
39
|
+
|
|
40
|
+
# Paths and patterns to exclude from scanning (gitignore-style via pathspec).
|
|
41
|
+
# Evaluated at every directory depth — node_modules/ matches at any level.
|
|
42
|
+
exclude_paths:
|
|
43
|
+
- .git/
|
|
44
|
+
- .venv/
|
|
45
|
+
- node_modules/
|
|
46
|
+
- dist/
|
|
47
|
+
- build/
|
|
48
|
+
- "*.egg-info/"
|
|
49
|
+
- __pycache__/
|
|
50
|
+
- .mypy_cache/
|
|
51
|
+
- .ruff_cache/
|
|
52
|
+
- .pytest_cache/
|
|
53
|
+
- htmlcov/
|
|
54
|
+
- "*.pyc"
|
|
55
|
+
- "*.pyo"
|
|
56
|
+
|
|
57
|
+
output:
|
|
58
|
+
# Default output format when --output flag is not specified.
|
|
59
|
+
# Accepted values: table, json, sarif, csv, pdf, html, junit, codequality, gitlab-sast
|
|
60
|
+
# All values map directly by name to OutputFormat (e.g., table → OutputFormat.TABLE),
|
|
61
|
+
# except gitlab-sast which must be mapped explicitly to OutputFormat.GITLAB_SAST —
|
|
62
|
+
# not via a generic replace/upper transform.
|
|
63
|
+
# ConfigurationError is raised on any value not in this set.
|
|
64
|
+
# Run: phi-scan explain config for authoritative descriptions of each format.
|
|
65
|
+
format: table
|
|
66
|
+
|
|
67
|
+
# Suppress all Rich terminal output.
|
|
68
|
+
# Spec: findings must still be written to the audit log when quiet: true.
|
|
69
|
+
# Exit code is NOT affected — violations must still produce a non-zero exit code
|
|
70
|
+
# when quiet: true, ensuring CI gates continue to work correctly.
|
|
71
|
+
quiet: false
|
|
72
|
+
|
|
73
|
+
audit:
|
|
74
|
+
# Path to the SQLite audit database.
|
|
75
|
+
# Audit logs are immutable — never DELETE or UPDATE rows (HIPAA 45 CFR §164.530(j)).
|
|
76
|
+
# Note: ~ is expanded via Path(database_path).expanduser() at runtime — not by the
|
|
77
|
+
# YAML parser. Ensure the runtime handles this before using the value as a file path.
|
|
78
|
+
database_path: "~/.phi-scanner/audit.db"
|
|
79
|
+
|
|
80
|
+
# HIPAA 45 CFR §164.530(j) requires audit records for a minimum of 6 years.
|
|
81
|
+
# Any 6-year span contains at most 2 leap years: 4×365 + 2×366 = 2192.
|
|
82
|
+
# This is the mathematical maximum — 3+ leap years in 6 years is impossible.
|
|
83
|
+
# AUDIT_RETENTION_DAYS in constants.py must equal this value.
|
|
84
|
+
retention_days: 2192
|
|
85
|
+
|
|
86
|
+
ai:
|
|
87
|
+
# Claude API review is disabled by default — all scanning is local.
|
|
88
|
+
# Enabling this sends only redacted code structure (never raw PHI) to the API.
|
|
89
|
+
# See CLAUDE.md AI Integration Rules before enabling.
|
|
90
|
+
enable_claude_review: false
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# PhiScan pre-commit hook definition.
|
|
2
|
+
#
|
|
3
|
+
# This file registers phi-scan with the pre-commit framework (https://pre-commit.com).
|
|
4
|
+
# Add the hook to your project by creating or editing .pre-commit-config.yaml:
|
|
5
|
+
#
|
|
6
|
+
# repos:
|
|
7
|
+
# - repo: https://github.com/joeyessak/phi-scan
|
|
8
|
+
# rev: v0.1.0
|
|
9
|
+
# hooks:
|
|
10
|
+
# - id: phi-scan
|
|
11
|
+
#
|
|
12
|
+
# See docs/ci-cd-integration.md for full configuration options.
|
|
13
|
+
- id: phi-scan
|
|
14
|
+
name: "PhiScan \u2014 PHI/PII Detection"
|
|
15
|
+
description: >
|
|
16
|
+
Scan staged changes for HIPAA-covered Protected Health Information (PHI) and PII.
|
|
17
|
+
Blocks the commit when new findings are detected. All scanning runs locally —
|
|
18
|
+
no data is transmitted to any external service.
|
|
19
|
+
entry: phi-scan scan --diff HEAD
|
|
20
|
+
language: python
|
|
21
|
+
types: [text]
|
|
22
|
+
pass_filenames: false
|
|
23
|
+
stages: [pre-commit, pre-push]
|
|
24
|
+
minimum_pre_commit_version: "2.0.0"
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to PhiScan will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.3.0] - 2026-03-30
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- **Detection engine — 4 layers:**
|
|
15
|
+
- Layer 1: Regex pattern registry covering all 18 HIPAA Safe Harbor identifiers plus MBI,
|
|
16
|
+
HICN, DEA number, genetic identifiers (rs-IDs, VCF, Ensembl), SUD-related field names,
|
|
17
|
+
age >90, quasi-identifier combinations (ZIP + DOB + sex), and NPI type distinction
|
|
18
|
+
- Layer 2: NLP named entity recognition via Presidio + spaCy (`phi-scan[nlp]` optional extra)
|
|
19
|
+
- Layer 3: FHIR R4 structured field scanning and HL7 v2 segment scanning (PID, NK1, IN1)
|
|
20
|
+
via `phi-scan[fhir]` and `phi-scan[hl7]` optional extras
|
|
21
|
+
- Layer 4: AI-assisted confidence scoring via Claude API (optional, disabled by default,
|
|
22
|
+
PHI is always redacted before any API call)
|
|
23
|
+
- **CLI commands:** `scan`, `scan --diff`, `scan --file`, `watch`, `report`, `history`,
|
|
24
|
+
`init`, `setup`, `fix`, `explain`, `baseline`, `install-hook`, `uninstall-hook`,
|
|
25
|
+
`config init`, `dashboard`, `plugins list`
|
|
26
|
+
- **Output formats:** `table`, `json`, `csv`, `sarif`, `junit`, `codequality`, `gitlab-sast`
|
|
27
|
+
- **Baseline management:** `phi-scan baseline create|show|clear|update|diff` — adopt
|
|
28
|
+
PhiScan incrementally in existing codebases; only new findings block CI
|
|
29
|
+
- **Auto-fix engine:** `phi-scan fix --dry-run|--apply|--patch` — replace PHI with
|
|
30
|
+
deterministic synthetic data (requires `faker`)
|
|
31
|
+
- **Inline suppression:** `# phi-scan:ignore`, `# phi-scan:ignore[SSN,MRN]`,
|
|
32
|
+
`# phi-scan:ignore-next-line`, `# phi-scan:ignore-file` with language-aware prefixes
|
|
33
|
+
- **SQLite audit log:** immutable HIPAA-compliant scan history; SHA-256 hashes only,
|
|
34
|
+
raw PHI values never stored
|
|
35
|
+
- **Structured output cache:** content-hash based scan cache to skip unchanged files
|
|
36
|
+
- **Pre-commit framework integration:** `.pre-commit-hooks.yaml` registers phi-scan as a
|
|
37
|
+
pre-commit hook; `phi-scan install-hook` installs a native git pre-commit hook
|
|
38
|
+
- **Rich terminal UI:** progress bar, findings table, file tree, code context panels,
|
|
39
|
+
ASCII banner; suppressed automatically for piped/serialised output formats
|
|
40
|
+
- **Graceful degradation:** NLP, FHIR, HL7, and AI layers each degrade to a logged
|
|
41
|
+
warning when their optional dependency group is not installed
|
|
42
|
+
- **PHI hygiene at detection layer:** matched PHI values are redacted to `[REDACTED]` in
|
|
43
|
+
`code_context` before `ScanFinding` is constructed — raw values never stored or displayed
|
|
44
|
+
- **Rich markup safety:** user-derived strings (file paths, source lines) are escaped
|
|
45
|
+
before Rich rendering to prevent `MarkupError` crashes on source containing `[` characters
|
|
46
|
+
|
|
47
|
+
### Changed
|
|
48
|
+
|
|
49
|
+
- Version bumped from `0.1.0` to `0.3.0` (Phases 1–3C complete per version table in PLAN.md)
|
|
50
|
+
- Dependency bounds narrowed to compatible-release pins (`~=`) for predictable installs
|
|
51
|
+
|
|
52
|
+
## [0.1.0] - 2026-03-01
|
|
53
|
+
|
|
54
|
+
### Added
|
|
55
|
+
|
|
56
|
+
- Project scaffolding: `pyproject.toml`, CI workflows (lint, typecheck, test, release,
|
|
57
|
+
Claude PR review), MIT license, `README.md`, `SECURITY.md`, `CODE_OF_CONDUCT.md`
|
|
58
|
+
- Typer CLI skeleton with Rich terminal UI and pyfiglet ASCII banner
|
|
59
|
+
- Structured logging (`--log-level`, `--log-file`)
|
|
60
|
+
- YAML configuration loading and validation (`.phi-scanner.yml`)
|
|
61
|
+
- Git diff file extraction (`--diff` mode)
|
|
62
|
+
- `.phi-scanignore` exclusion pattern support (gitignore-style via pathspec)
|
|
63
|
+
|
|
64
|
+
[Unreleased]: https://github.com/joeyessak/phi-scan/compare/v0.3.0...HEAD
|
|
65
|
+
[0.3.0]: https://github.com/joeyessak/phi-scan/compare/v0.1.0...v0.3.0
|
|
66
|
+
[0.1.0]: https://github.com/joeyessak/phi-scan/releases/tag/v0.1.0
|
phi_scan-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Joey Essak
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
phi_scan-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phi-scan
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: PHI/PII Scanner for CI/CD pipelines. HIPAA & FHIR compliant. Local execution only.
|
|
5
|
+
Project-URL: Homepage, https://github.com/joeyessak/phi-scan
|
|
6
|
+
Project-URL: Repository, https://github.com/joeyessak/phi-scan
|
|
7
|
+
Project-URL: Issues, https://github.com/joeyessak/phi-scan/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/joeyessak/phi-scan/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: Joey Essak <joey.essak@gmail.com>
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 Joey Essak
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: cicd,compliance,fhir,hipaa,phi,pii,scanner,security
|
|
33
|
+
Classifier: Development Status :: 3 - Alpha
|
|
34
|
+
Classifier: Environment :: Console
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: Intended Audience :: Healthcare Industry
|
|
37
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
+
Classifier: Operating System :: OS Independent
|
|
39
|
+
Classifier: Programming Language :: Python :: 3
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Classifier: Topic :: Security
|
|
42
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
43
|
+
Classifier: Typing :: Typed
|
|
44
|
+
Requires-Python: >=3.12
|
|
45
|
+
Requires-Dist: httpx~=0.27
|
|
46
|
+
Requires-Dist: pathspec~=0.12
|
|
47
|
+
Requires-Dist: pyfiglet~=1.0
|
|
48
|
+
Requires-Dist: python-dotenv~=1.2
|
|
49
|
+
Requires-Dist: pyyaml~=6.0
|
|
50
|
+
Requires-Dist: rich~=13.7
|
|
51
|
+
Requires-Dist: typer[all]~=0.24.1
|
|
52
|
+
Requires-Dist: watchdog~=4.0
|
|
53
|
+
Provides-Extra: fhir
|
|
54
|
+
Requires-Dist: fhir-resources>=7.0; extra == 'fhir'
|
|
55
|
+
Provides-Extra: full
|
|
56
|
+
Requires-Dist: fhir-resources>=7.0; extra == 'full'
|
|
57
|
+
Requires-Dist: fpdf2>=2.7; extra == 'full'
|
|
58
|
+
Requires-Dist: hl7>=0.4; extra == 'full'
|
|
59
|
+
Requires-Dist: jinja2>=3.1; extra == 'full'
|
|
60
|
+
Requires-Dist: matplotlib>=3.8; extra == 'full'
|
|
61
|
+
Requires-Dist: presidio-analyzer>=2.0; extra == 'full'
|
|
62
|
+
Requires-Dist: presidio-anonymizer>=2.0; extra == 'full'
|
|
63
|
+
Requires-Dist: spacy>=3.7; extra == 'full'
|
|
64
|
+
Provides-Extra: hl7
|
|
65
|
+
Requires-Dist: hl7>=0.4; extra == 'hl7'
|
|
66
|
+
Provides-Extra: nlp
|
|
67
|
+
Requires-Dist: presidio-analyzer>=2.0; extra == 'nlp'
|
|
68
|
+
Requires-Dist: presidio-anonymizer>=2.0; extra == 'nlp'
|
|
69
|
+
Requires-Dist: spacy>=3.7; extra == 'nlp'
|
|
70
|
+
Provides-Extra: reports
|
|
71
|
+
Requires-Dist: fpdf2>=2.7; extra == 'reports'
|
|
72
|
+
Requires-Dist: jinja2>=3.1; extra == 'reports'
|
|
73
|
+
Requires-Dist: matplotlib>=3.8; extra == 'reports'
|
|
74
|
+
Description-Content-Type: text/markdown
|
|
75
|
+
|
|
76
|
+
# PhiScan
|
|
77
|
+
|
|
78
|
+
[](LICENSE)
|
|
79
|
+
[](https://www.python.org/downloads/)
|
|
80
|
+
[](https://github.com/joeyessak/phi-scan/actions/workflows/ci.yml)
|
|
81
|
+
|
|
82
|
+
HIPAA & FHIR compliant PHI/PII scanner for CI/CD pipelines. Local execution only — no PHI ever leaves your infrastructure.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## What it does
|
|
87
|
+
|
|
88
|
+
PhiScan scans source code for Protected Health Information (PHI) and Personally Identifiable Information (PII) before it reaches your main branch. It integrates into CI/CD pipelines to block pull requests that contain exposed PHI.
|
|
89
|
+
|
|
90
|
+
All scanning runs locally inside your pipeline runner. Nothing is sent to an external API.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Install
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pipx install phi-scan
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Or with uv:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
uv tool install phi-scan
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Usage
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# Scan a directory
|
|
112
|
+
phi-scan scan ./src
|
|
113
|
+
|
|
114
|
+
# Scan only files changed in the last commit
|
|
115
|
+
phi-scan scan --diff HEAD~1
|
|
116
|
+
|
|
117
|
+
# Scan a single file
|
|
118
|
+
phi-scan scan --file path/to/handler.py
|
|
119
|
+
|
|
120
|
+
# Output as JSON
|
|
121
|
+
phi-scan scan ./src --output json
|
|
122
|
+
|
|
123
|
+
# Show help
|
|
124
|
+
phi-scan --help
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Contributing
|
|
130
|
+
|
|
131
|
+
### Branch protection rules
|
|
132
|
+
|
|
133
|
+
The `main` branch is protected. All changes arrive via pull request. No one pushes directly to `main`.
|
|
134
|
+
|
|
135
|
+
| Rule | Setting |
|
|
136
|
+
| ---- | ------- |
|
|
137
|
+
| Require CI to pass before merge | All jobs in `ci.yml` must pass (lint, typecheck, tests on all 3 platforms) |
|
|
138
|
+
| Require at least one review | Enforced when collaborators join the project |
|
|
139
|
+
| No direct pushes to `main` | Branch protection enforced via GitHub settings |
|
|
140
|
+
|
|
141
|
+
To configure these rules: **Settings → Branches → Add branch protection rule → `main`**, then enable:
|
|
142
|
+
- "Require a pull request before merging"
|
|
143
|
+
- "Require status checks to pass before merging" → select the `CI` workflow jobs
|
|
144
|
+
- "Do not allow bypassing the above settings"
|
|
145
|
+
|
|
146
|
+
### CI workflows
|
|
147
|
+
|
|
148
|
+
| Workflow | Trigger | What it does |
|
|
149
|
+
| -------- | ------- | ------------ |
|
|
150
|
+
| `ci.yml` | Every push and PR targeting `main` | Lint (ruff), typecheck (mypy), tests (pytest + coverage) on Python 3.12 × ubuntu/macos/windows |
|
|
151
|
+
| `release.yml` | Push of a `v*` tag | Runs tests, builds sdist + wheel, publishes to PyPI, creates GitHub Release |
|
|
152
|
+
| `claude-review.yml` | Every PR open/update | Posts an automated Claude code review comment |
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
[MIT](LICENSE)
|
phi_scan-0.3.0/README.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# PhiScan
|
|
2
|
+
|
|
3
|
+
[](LICENSE)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](https://github.com/joeyessak/phi-scan/actions/workflows/ci.yml)
|
|
6
|
+
|
|
7
|
+
HIPAA & FHIR compliant PHI/PII scanner for CI/CD pipelines. Local execution only — no PHI ever leaves your infrastructure.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## What it does
|
|
12
|
+
|
|
13
|
+
PhiScan scans source code for Protected Health Information (PHI) and Personally Identifiable Information (PII) before it reaches your main branch. It integrates into CI/CD pipelines to block pull requests that contain exposed PHI.
|
|
14
|
+
|
|
15
|
+
All scanning runs locally inside your pipeline runner. Nothing is sent to an external API.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pipx install phi-scan
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Or with uv:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
uv tool install phi-scan
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# Scan a directory
|
|
37
|
+
phi-scan scan ./src
|
|
38
|
+
|
|
39
|
+
# Scan only files changed in the last commit
|
|
40
|
+
phi-scan scan --diff HEAD~1
|
|
41
|
+
|
|
42
|
+
# Scan a single file
|
|
43
|
+
phi-scan scan --file path/to/handler.py
|
|
44
|
+
|
|
45
|
+
# Output as JSON
|
|
46
|
+
phi-scan scan ./src --output json
|
|
47
|
+
|
|
48
|
+
# Show help
|
|
49
|
+
phi-scan --help
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Contributing
|
|
55
|
+
|
|
56
|
+
### Branch protection rules
|
|
57
|
+
|
|
58
|
+
The `main` branch is protected. All changes arrive via pull request. No one pushes directly to `main`.
|
|
59
|
+
|
|
60
|
+
| Rule | Setting |
|
|
61
|
+
| ---- | ------- |
|
|
62
|
+
| Require CI to pass before merge | All jobs in `ci.yml` must pass (lint, typecheck, tests on all 3 platforms) |
|
|
63
|
+
| Require at least one review | Enforced when collaborators join the project |
|
|
64
|
+
| No direct pushes to `main` | Branch protection enforced via GitHub settings |
|
|
65
|
+
|
|
66
|
+
To configure these rules: **Settings → Branches → Add branch protection rule → `main`**, then enable:
|
|
67
|
+
- "Require a pull request before merging"
|
|
68
|
+
- "Require status checks to pass before merging" → select the `CI` workflow jobs
|
|
69
|
+
- "Do not allow bypassing the above settings"
|
|
70
|
+
|
|
71
|
+
### CI workflows
|
|
72
|
+
|
|
73
|
+
| Workflow | Trigger | What it does |
|
|
74
|
+
| -------- | ------- | ------------ |
|
|
75
|
+
| `ci.yml` | Every push and PR targeting `main` | Lint (ruff), typecheck (mypy), tests (pytest + coverage) on Python 3.12 × ubuntu/macos/windows |
|
|
76
|
+
| `release.yml` | Push of a `v*` tag | Runs tests, builds sdist + wheel, publishes to PyPI, creates GitHub Release |
|
|
77
|
+
| `claude-review.yml` | Every PR open/update | Posts an automated Claude code review comment |
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
[MIT](LICENSE)
|