cloakpii 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloakpii-1.2.0/LICENSE +21 -0
- cloakpii-1.2.0/PKG-INFO +422 -0
- cloakpii-1.2.0/README.md +389 -0
- cloakpii-1.2.0/pyproject.toml +55 -0
- cloakpii-1.2.0/setup.cfg +4 -0
- cloakpii-1.2.0/src/cloakpii/__init__.py +3 -0
- cloakpii-1.2.0/src/cloakpii/__main__.py +6 -0
- cloakpii-1.2.0/src/cloakpii/audit.py +151 -0
- cloakpii-1.2.0/src/cloakpii/cli.py +770 -0
- cloakpii-1.2.0/src/cloakpii/compliance.py +397 -0
- cloakpii-1.2.0/src/cloakpii/config.py +173 -0
- cloakpii-1.2.0/src/cloakpii/crypto.py +124 -0
- cloakpii-1.2.0/src/cloakpii/exceptions.py +47 -0
- cloakpii-1.2.0/src/cloakpii/integrity.py +119 -0
- cloakpii-1.2.0/src/cloakpii/migrate.py +782 -0
- cloakpii-1.2.0/src/cloakpii/pii.py +758 -0
- cloakpii-1.2.0/src/cloakpii/pii_ml.py +115 -0
- cloakpii-1.2.0/src/cloakpii/state.py +90 -0
- cloakpii-1.2.0/src/cloakpii.egg-info/PKG-INFO +422 -0
- cloakpii-1.2.0/src/cloakpii.egg-info/SOURCES.txt +31 -0
- cloakpii-1.2.0/src/cloakpii.egg-info/dependency_links.txt +1 -0
- cloakpii-1.2.0/src/cloakpii.egg-info/entry_points.txt +2 -0
- cloakpii-1.2.0/src/cloakpii.egg-info/requires.txt +5 -0
- cloakpii-1.2.0/src/cloakpii.egg-info/top_level.txt +1 -0
- cloakpii-1.2.0/tests/test_comprehensive.py +636 -0
- cloakpii-1.2.0/tests/test_crypto.py +95 -0
- cloakpii-1.2.0/tests/test_incremental.py +153 -0
- cloakpii-1.2.0/tests/test_migrate.py +160 -0
- cloakpii-1.2.0/tests/test_new_features.py +313 -0
- cloakpii-1.2.0/tests/test_optimizations.py +104 -0
- cloakpii-1.2.0/tests/test_pIPL_pdpa.py +227 -0
- cloakpii-1.2.0/tests/test_pii.py +161 -0
- cloakpii-1.2.0/tests/test_route_a.py +79 -0
cloakpii-1.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 CloakPII Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cloakpii-1.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cloakpii
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary: PII desensitization + AES-256-GCM encryption + compliance reporting for cross-border data transfers (PIPL / PDPA / GDPR)
|
|
5
|
+
Author: Hellotravisss
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Hellotravisss/cloakpii
|
|
8
|
+
Project-URL: Repository, https://github.com/Hellotravisss/cloakpii
|
|
9
|
+
Project-URL: Changelog, https://github.com/Hellotravisss/cloakpii/blob/main/CHANGELOG.md
|
|
10
|
+
Project-URL: Issues, https://github.com/Hellotravisss/cloakpii/issues
|
|
11
|
+
Keywords: pii,data-masking,encryption,compliance,pipl,pdpa,gdpr,data-privacy,cross-border
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Information Technology
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Security :: Cryptography
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: cryptography>=42.0
|
|
28
|
+
Requires-Dist: pyyaml>=6.0
|
|
29
|
+
Requires-Dist: openpyxl>=3.1
|
|
30
|
+
Requires-Dist: pyarrow>=14.0
|
|
31
|
+
Requires-Dist: tqdm>=4.60
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# CloakPII
|
|
35
|
+
|
|
36
|
+
[](https://www.python.org/downloads/)
|
|
37
|
+
[](https://opensource.org/licenses/MIT)
|
|
38
|
+
[](https://github.com/Hellotravisss/cloakpii/actions/workflows/ci.yml)
|
|
39
|
+
|
|
40
|
+
Secure, compliant data migration toolkit for offshore transfers. Automatically detects and desensitizes PII (Personally Identifiable Information), encrypts data with AES-256-GCM, and generates regulatory documentation for cross-border data protection regimes.
|
|
41
|
+
|
|
42
|
+
## What this is — and what it isn't
|
|
43
|
+
|
|
44
|
+
**Use it to** turn a directory of files containing PII into a **desensitized, encrypted** copy that is safe to move across borders (the design focus is **China ⇄ Singapore**, i.e. PIPL + PDPA), together with the paperwork those regimes expect.
|
|
45
|
+
|
|
46
|
+
Two things to understand before you rely on it:
|
|
47
|
+
|
|
48
|
+
- **Desensitization is irreversible.** Masked values (`alice@x.com` → `a***@x******.com`) cannot be recovered — even after you decrypt. `decrypt-all` gives you back the *masked* data, not the original. If you need to move **usable** raw data, this is not the right tool; use transport encryption without the masking step.
|
|
49
|
+
- **Compliance output is documentation, not legal sign-off.** The `profiles`, `assessment`, and `--compliance-report` features generate checklists and declaration templates to *help* you prepare a filing. They do not constitute legal advice or a guarantee of compliance — have counsel review actual cross-border filings.
|
|
50
|
+
|
|
51
|
+
## Features
|
|
52
|
+
|
|
53
|
+
- **8 file formats**: CSV, JSON, Excel, Parquet, XML, TSV, SQLite, plain text
|
|
54
|
+
- **11 PII types**: email, phone, SSN, credit card, IP, Chinese ID, passport, bank account, IBAN, MAC address, date of birth
|
|
55
|
+
- **5 compliance profiles**: GDPR (EU), PDPA (Singapore), CCPA (California), LGPD (Brazil), PIPL (China)
|
|
56
|
+
- **AES-256-GCM encryption** with PBKDF2 key derivation (480k iterations)
|
|
57
|
+
- **Parallel processing** with configurable worker threads
|
|
58
|
+
- **Progress bar** for real-time feedback
|
|
59
|
+
- **Integrity verification** via SHA-256 manifests
|
|
60
|
+
- **Audit trail** logging (JSON Lines)
|
|
61
|
+
- **YAML configuration** files with CLI overrides
|
|
62
|
+
- **Compression** support (gzip)
|
|
63
|
+
- **Resume** interrupted migrations
|
|
64
|
+
- **Docker** support
|
|
65
|
+
|
|
66
|
+
## Quick Start
|
|
67
|
+
|
|
68
|
+
### Installation
|
|
69
|
+
|
|
70
|
+
Install from source:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
git clone https://github.com/Hellotravisss/cloakpii.git
|
|
74
|
+
cd cloakpii
|
|
75
|
+
pip install -e .
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Basic Usage
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Migrate a directory (desensitize + encrypt)
|
|
82
|
+
cloakpii migrate --source data/ --output output/ --password mypassword
|
|
83
|
+
|
|
84
|
+
# Preview what would happen (dry run)
|
|
85
|
+
cloakpii migrate --source data/ --dry-run
|
|
86
|
+
|
|
87
|
+
# Encrypt a single file
|
|
88
|
+
cloakpii encrypt input.csv output.csv.enc --password mypassword
|
|
89
|
+
|
|
90
|
+
# Decrypt a file
|
|
91
|
+
cloakpii decrypt output.csv.enc decrypted.csv --password mypassword
|
|
92
|
+
|
|
93
|
+
# Restore an entire migration output tree (desensitized plaintext)
|
|
94
|
+
cloakpii decrypt-all --input output/encrypted --output restored/ --password mypassword
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Using Environment Variables
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
export ODM_PASSWORD=mypassword
|
|
101
|
+
cloakpii migrate --source data/ --output output/
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## CLI Reference
|
|
105
|
+
|
|
106
|
+
### Commands
|
|
107
|
+
|
|
108
|
+
| Command | Description |
|
|
109
|
+
|------------|----------------------------------------------|
|
|
110
|
+
| `migrate` | Run full migration pipeline |
|
|
111
|
+
| `encrypt` | Encrypt a single file |
|
|
112
|
+
| `decrypt` | Decrypt a single file |
|
|
113
|
+
| `decrypt-all` | Decrypt a whole migration output tree |
|
|
114
|
+
| `init` | Initialize project configuration |
|
|
115
|
+
| `verify` | Verify file integrity against a manifest |
|
|
116
|
+
| `status` | Show status of a previous migration |
|
|
117
|
+
| `profiles` | List available compliance profiles |
|
|
118
|
+
|
|
119
|
+
### migrate
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
cloakpii migrate [OPTIONS]
|
|
123
|
+
|
|
124
|
+
Options:
|
|
125
|
+
--source DIR Source directory (default: examples)
|
|
126
|
+
--output DIR Output directory (default: output)
|
|
127
|
+
--target NAME Target jurisdiction (default: singapore)
|
|
128
|
+
--password PW Encryption password (or use ODM_PASSWORD env var)
|
|
129
|
+
--config FILE Path to YAML config file
|
|
130
|
+
--dry-run Preview without modifying files
|
|
131
|
+
--workers N Number of parallel workers (default: 1)
|
|
132
|
+
--batch-size N Max files to process (0 = all)
|
|
133
|
+
--no-progress Disable progress bar
|
|
134
|
+
--compliance-profile P Validate against profile (gdpr/pdpa/ccpa/lgpd/pipl)
|
|
135
|
+
--compress Compress encrypted output with gzip
|
|
136
|
+
--resume Skip already-processed files
|
|
137
|
+
--no-manifest Skip SHA-256 manifest generation
|
|
138
|
+
--audit FILE Path for audit log (JSON Lines)
|
|
139
|
+
--skip-patterns PAT... Glob patterns for files to skip
|
|
140
|
+
--verbose Enable debug logging
|
|
141
|
+
--log-file FILE Write logs to file
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Examples
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
# Parallel processing with 4 workers
|
|
148
|
+
cloakpii migrate --source data/ --output out/ --workers 4
|
|
149
|
+
|
|
150
|
+
# GDPR compliance check
|
|
151
|
+
cloakpii migrate --source data/ --compliance-profile gdpr
|
|
152
|
+
|
|
153
|
+
# Process only first 10 files
|
|
154
|
+
cloakpii migrate --source data/ --batch-size 10
|
|
155
|
+
|
|
156
|
+
# Resume interrupted migration
|
|
157
|
+
cloakpii migrate --source data/ --output out/ --resume
|
|
158
|
+
|
|
159
|
+
# With audit log and compression
|
|
160
|
+
cloakpii migrate --source data/ --audit out/audit.jsonl --compress
|
|
161
|
+
|
|
162
|
+
# Skip test files
|
|
163
|
+
cloakpii migrate --source data/ --skip-patterns "test_*" "*.tmp"
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Configuration File
|
|
167
|
+
|
|
168
|
+
Create a `migration.yaml` for reusable settings:
|
|
169
|
+
|
|
170
|
+
```yaml
|
|
171
|
+
source: /path/to/data
|
|
172
|
+
output: /path/to/output
|
|
173
|
+
target: singapore
|
|
174
|
+
compliance_profile: pdpa
|
|
175
|
+
workers: 4
|
|
176
|
+
batch_size: 0
|
|
177
|
+
show_progress: true
|
|
178
|
+
encrypt_method: aes-256-gcm
|
|
179
|
+
audit_log: true
|
|
180
|
+
generate_manifest: true
|
|
181
|
+
compress_output: false
|
|
182
|
+
skip_patterns:
|
|
183
|
+
- "*.tmp"
|
|
184
|
+
- "test_*"
|
|
185
|
+
custom_pii_patterns: []
|
|
186
|
+
field_mappings: {}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Use it:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
cloakpii migrate --config migration.yaml
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
CLI arguments override config file values.
|
|
196
|
+
|
|
197
|
+
## Supported File Formats
|
|
198
|
+
|
|
199
|
+
| Format | Extension | Description |
|
|
200
|
+
|----------|------------------------|--------------------------------------|
|
|
201
|
+
| CSV | `.csv` | Comma-separated values |
|
|
202
|
+
| JSON | `.json` | JSON files (nested structures) |
|
|
203
|
+
| Excel | `.xlsx`, `.xls` | Excel workbooks (all sheets) |
|
|
204
|
+
| Parquet | `.parquet` | Apache Parquet columnar format |
|
|
205
|
+
| XML | `.xml` | XML documents |
|
|
206
|
+
| TSV | `.tsv` | Tab-separated values |
|
|
207
|
+
| SQLite | `.db`, `.sqlite` | SQLite databases (all tables) |
|
|
208
|
+
| Text | `.txt`, `.log`, `.md` | Plain text files |
|
|
209
|
+
|
|
210
|
+
## Supported PII Types
|
|
211
|
+
|
|
212
|
+
| PII Type | Example | Masked Output |
|
|
213
|
+
|-----------------|----------------------------|----------------------------|
|
|
214
|
+
| Email | `user@example.com` | `u***@e******.com` |
|
|
215
|
+
| Phone | `555-123-4567` | `555-***-****` |
|
|
216
|
+
| SSN | `123-45-6789` | `***-**-6789` |
|
|
217
|
+
| Credit Card | `4111111111111111` | `4111****1111` |
|
|
218
|
+
| IP Address | `192.168.1.100` | `192.168.*.*` |
|
|
219
|
+
| Chinese ID | `110101199001011234` | `1101***********234` |
|
|
220
|
+
| Passport | `AB1234567` | `AB***4567` |
|
|
221
|
+
| Bank Account | `1234567890123456` | `1234********3456` |
|
|
222
|
+
| IBAN | `GB29NWBK60161331926819` | `GB29****6819` |
|
|
223
|
+
| MAC Address | `00:1B:44:11:3A:B7` | `00:1B:**:**:**:B7` |
|
|
224
|
+
| Date of Birth | `1990-01-15` | `****-**-15` |
|
|
225
|
+
|
|
226
|
+
Field names containing keywords like `name`, `email`, `phone`, `ssn`, `address`, `passport`, `bank_account` are automatically masked even if content doesn't match a regex pattern.
|
|
227
|
+
|
|
228
|
+
## Compliance Profiles
|
|
229
|
+
## Route A Focus (v1.1.0+): China & Singapore Compliance
|
|
230
|
+
|
|
231
|
+
**CloakPII** is now optimized for **PIPL (China)** and **PDPA (Singapore)** — two of the strictest data protection regimes for cross-border transfers.
|
|
232
|
+
|
|
233
|
+
### Quick Start - PIPL (China)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
Generates:
|
|
237
|
+
- Full PII desensitization + AES-256-GCM encryption
|
|
238
|
+
- Security assessment checklist
|
|
239
|
+
- Cross-border transfer legal path documentation
|
|
240
|
+
|
|
241
|
+
### Quick Start - PDPA (Singapore)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
Includes DPO requirements and 30-day access request handling notes.
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
cloakpii profiles
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
| Profile | Jurisdiction | Key Requirements |
|
|
252
|
+
|---------|-------------|------------------|
|
|
253
|
+
| GDPR | EU | Explicit consent, 72h breach notification, right to erasure |
|
|
254
|
+
| PDPA | Singapore | DPO required, 30-day access requests |
|
|
255
|
+
| CCPA | California | Right to know/delete/opt-out |
|
|
256
|
+
| LGPD | Brazil | Legal basis required, ANPD reporting |
|
|
257
|
+
| PIPL | China | Data localization, cross-border assessment required |
|
|
258
|
+
|
|
259
|
+
## Docker
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
# Build
|
|
263
|
+
docker build -t cloakpii .
|
|
264
|
+
|
|
265
|
+
# Run
|
|
266
|
+
docker run --rm -v $(pwd)/data:/data -v $(pwd)/output:/output \
|
|
267
|
+
-e ODM_PASSWORD=mypassword \
|
|
268
|
+
cloakpii migrate --source /data --output /output
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
Or with docker-compose:
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
ODM_PASSWORD=mypassword docker-compose run migrator
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
## Architecture
|
|
278
|
+
|
|
279
|
+
```
|
|
280
|
+
cloakpii/
|
|
281
|
+
├── __init__.py # Version
|
|
282
|
+
├── cli.py # CLI entry point (argparse)
|
|
283
|
+
├── crypto.py # AES-256-GCM encryption
|
|
284
|
+
├── pii.py # PII detection & desensitization (8 formats)
|
|
285
|
+
├── migrate.py # Migration pipeline orchestration
|
|
286
|
+
├── compliance.py # Jurisdiction compliance profiles
|
|
287
|
+
├── integrity.py # SHA-256 manifest verification
|
|
288
|
+
├── config.py # YAML configuration support
|
|
289
|
+
└── audit.py # Audit trail logging
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
**Pipeline flow:**
|
|
293
|
+
```
|
|
294
|
+
Source files → Classify → Desensitize PII → Encrypt (AES-256-GCM) → Manifest → Output
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
## Development
|
|
298
|
+
|
|
299
|
+
```bash
|
|
300
|
+
# Clone and install
|
|
301
|
+
git clone https://github.com/Hellotravisss/cloakpii.git
|
|
302
|
+
cd cloakpii
|
|
303
|
+
pip install -e .
|
|
304
|
+
pip install pytest ruff
|
|
305
|
+
|
|
306
|
+
# Run tests
|
|
307
|
+
make test
|
|
308
|
+
|
|
309
|
+
# Lint
|
|
310
|
+
make lint
|
|
311
|
+
|
|
312
|
+
# Build
|
|
313
|
+
make build
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
## License
|
|
317
|
+
|
|
318
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
319
|
+
|
|
320
|
+
## Route A Quickstart (PIPL + PDPA) — v1.1.0
|
|
321
|
+
|
|
322
|
+
```bash
|
|
323
|
+
# 1. List enhanced compliance profiles
|
|
324
|
+
cloakpii profiles
|
|
325
|
+
|
|
326
|
+
# 2. Run migration with compliance report (PIPL)
|
|
327
|
+
ODM_PASSWORD=yourpass cloakpii migrate \
|
|
328
|
+
--source examples \
|
|
329
|
+
--output output/pipl \
|
|
330
|
+
--compliance-profile pipl \
|
|
331
|
+
--compliance-report
|
|
332
|
+
|
|
333
|
+
# 3. Same for PDPA (Singapore)
|
|
334
|
+
ODM_PASSWORD=yourpass cloakpii migrate \
|
|
335
|
+
--source examples \
|
|
336
|
+
--output output/pdpa \
|
|
337
|
+
--compliance-profile pdpa \
|
|
338
|
+
--compliance-report
|
|
339
|
+
|
|
340
|
+
# Reports will be generated:
|
|
341
|
+
# - compliance_report_pipl.json + .md
|
|
342
|
+
# - compliance_report_pdpa.json + .md
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
## New in v1.1.0 (Route A)
|
|
346
|
+
|
|
347
|
+
### New Commands
|
|
348
|
+
```bash
|
|
349
|
+
# Scan a directory for PII without migrating
|
|
350
|
+
cloakpii scan --source data/ --output scan_report.json
|
|
351
|
+
|
|
352
|
+
# Generate PIPL Security Assessment template
|
|
353
|
+
cloakpii assessment --output security_assessment.json
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
### Enhanced migrate command
|
|
357
|
+
```bash
|
|
358
|
+
# Generate professional compliance report (JSON + Markdown)
|
|
359
|
+
cloakpii migrate \
|
|
360
|
+
--source examples \
|
|
361
|
+
--compliance-profile pipl \
|
|
362
|
+
--compliance-report
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
### Configuration
|
|
366
|
+
You can now store password in your `migration.yaml`:
|
|
367
|
+
```yaml
|
|
368
|
+
password: "your-password-here"
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
## Incremental Migration & Resume
|
|
373
|
+
|
|
374
|
+
CloakPII supports **incremental/resume** migrations using a local SQLite state database.
|
|
375
|
+
|
|
376
|
+
### How it works
|
|
377
|
+
|
|
378
|
+
- When you run with `--resume`, the tool records each successfully processed file (path + SHA256 hash) in `.migration_state.db` inside the output directory.
|
|
379
|
+
- On subsequent runs with `--resume`, files with the **same path and hash** are automatically skipped.
|
|
380
|
+
- If a file is modified after being processed, its hash changes and it will be re-processed.
|
|
381
|
+
|
|
382
|
+
### Usage
|
|
383
|
+
|
|
384
|
+
```bash
|
|
385
|
+
# First run (processes everything)
|
|
386
|
+
cloakpii migrate --source data/ --output out/ --resume
|
|
387
|
+
|
|
388
|
+
# Later runs (only processes new or changed files)
|
|
389
|
+
cloakpii migrate --source data/ --output out/ --resume
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
### State Database Location
|
|
393
|
+
|
|
394
|
+
The state file is stored at:
|
|
395
|
+
```
|
|
396
|
+
<output_directory>/.migration_state.db
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
You can safely delete this file to force a full re-processing.
|
|
400
|
+
|
|
401
|
+
### Corruption Recovery
|
|
402
|
+
|
|
403
|
+
If the state database becomes corrupted (e.g. interrupted write), the migrator will automatically delete it and start fresh on the next run.
|
|
404
|
+
|
|
405
|
+
### Advanced: Custom State Location
|
|
406
|
+
|
|
407
|
+
For advanced use cases, you can manage the state manually via the Python API:
|
|
408
|
+
|
|
409
|
+
```python
|
|
410
|
+
from cloakpii.state import MigrationState
|
|
411
|
+
from cloakpii.migrate import run_migration
|
|
412
|
+
from pathlib import Path
|
|
413
|
+
|
|
414
|
+
state = MigrationState(Path("custom_state.db"))
|
|
415
|
+
report = run_migration(
|
|
416
|
+
source_dir=Path("data"),
|
|
417
|
+
output_dir=Path("out"),
|
|
418
|
+
password="secret",
|
|
419
|
+
resume=True,
|
|
420
|
+
state=state
|
|
421
|
+
)
|
|
422
|
+
```
|