expunct 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. expunct-0.1.0/.github/workflows/ci.yml +21 -0
  2. expunct-0.1.0/.github/workflows/publish.yml +34 -0
  3. expunct-0.1.0/.gitignore +13 -0
  4. expunct-0.1.0/PKG-INFO +357 -0
  5. expunct-0.1.0/README.md +340 -0
  6. expunct-0.1.0/pyproject.toml +40 -0
  7. expunct-0.1.0/src/expunct/__init__.py +63 -0
  8. expunct-0.1.0/src/expunct/_async_client.py +101 -0
  9. expunct-0.1.0/src/expunct/_base_client.py +173 -0
  10. expunct-0.1.0/src/expunct/_client.py +102 -0
  11. expunct-0.1.0/src/expunct/_exceptions.py +57 -0
  12. expunct-0.1.0/src/expunct/_polling.py +58 -0
  13. expunct-0.1.0/src/expunct/_version.py +3 -0
  14. expunct-0.1.0/src/expunct/models/__init__.py +32 -0
  15. expunct-0.1.0/src/expunct/models/api_keys.py +37 -0
  16. expunct-0.1.0/src/expunct/models/audit.py +27 -0
  17. expunct-0.1.0/src/expunct/models/batch.py +26 -0
  18. expunct-0.1.0/src/expunct/models/jobs.py +40 -0
  19. expunct-0.1.0/src/expunct/models/policies.py +84 -0
  20. expunct-0.1.0/src/expunct/models/redaction.py +24 -0
  21. expunct-0.1.0/src/expunct/resources/__init__.py +23 -0
  22. expunct-0.1.0/src/expunct/resources/api_keys.py +44 -0
  23. expunct-0.1.0/src/expunct/resources/audit.py +46 -0
  24. expunct-0.1.0/src/expunct/resources/batch.py +28 -0
  25. expunct-0.1.0/src/expunct/resources/jobs.py +77 -0
  26. expunct-0.1.0/src/expunct/resources/policies.py +62 -0
  27. expunct-0.1.0/src/expunct/resources/redact.py +157 -0
  28. expunct-0.1.0/tests/__init__.py +0 -0
  29. expunct-0.1.0/tests/conftest.py +17 -0
  30. expunct-0.1.0/tests/test_client.py +151 -0
  31. expunct-0.1.0/tests/test_convenience.py +356 -0
  32. expunct-0.1.0/tests/test_errors.py +405 -0
  33. expunct-0.1.0/tests/test_jobs.py +313 -0
  34. expunct-0.1.0/tests/test_redact.py +297 -0
@@ -0,0 +1,21 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: ${{ matrix.python-version }}
20
+ - run: pip install -e ".[dev]"
21
+ - run: pytest tests/ -v
@@ -0,0 +1,34 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ publish:
11
+ runs-on: ubuntu-latest
12
+ permissions:
13
+ contents: read
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: '3.12'
20
+
21
+ - name: Install build tools
22
+ run: pip install build twine
23
+
24
+ - name: Build
25
+ run: python -m build
26
+
27
+ - name: Check distribution
28
+ run: twine check dist/*
29
+
30
+ - name: Publish to PyPI
31
+ env:
32
+ TWINE_USERNAME: __token__
33
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
34
+ run: twine upload dist/*
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .eggs/
7
+ *.egg
8
+ .mypy_cache/
9
+ .pytest_cache/
10
+ .venv/
11
+ venv/
12
+ *.so
13
+ .DS_Store
expunct-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,357 @@
1
+ Metadata-Version: 2.4
2
+ Name: expunct
3
+ Version: 0.1.0
4
+ Summary: Python SDK for the Expunct API
5
+ Project-URL: repository, https://github.com/expunct/python-sdk
6
+ Author: Expunct
7
+ License-Expression: MIT
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: httpx>=0.27
10
+ Requires-Dist: pydantic>=2.0
11
+ Provides-Extra: dev
12
+ Requires-Dist: mypy>=1.11; extra == 'dev'
13
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
14
+ Requires-Dist: pytest-httpx>=0.35; extra == 'dev'
15
+ Requires-Dist: pytest>=8.0; extra == 'dev'
16
+ Description-Content-Type: text/markdown
17
+
18
+ # Expunct Python SDK
19
+
20
+ Privacy infrastructure for modern applications. Detect and redact PII, secrets, and sensitive data before it reaches AI, logs, or external APIs.
21
+
22
+ [![PyPI version](https://badge.fury.io/py/expunct.svg)](https://pypi.org/project/expunct/)
23
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
24
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install expunct
30
+ ```
31
+
32
+ Get your API key at [expunct.ai](https://expunct.ai) — free tier includes 1M tokens/month, no credit card required.
33
+
34
+ ## Quick Start
35
+
36
+ ```python
37
+ from expunct import Expunct
38
+
39
+ client = Expunct(api_key="your-api-key")
40
+ redacted = client.sanitize_text("Alice Johnson's email is alice@example.com and SSN is 219-09-9999.")
41
+ print(redacted)
42
+ # Output: PERSON_1's email is EMAIL_ADDRESS_1 and SSN is US_SSN_1.
43
+ ```
44
+
45
+ ## Usage
46
+
47
+ ### Text redaction (sync)
48
+
49
+ ```python
50
+ from expunct import Expunct
51
+
52
+ client = Expunct(api_key="your-api-key")
53
+
54
+ redacted = client.sanitize_text("Call Bob at 415-555-0100 or bob@example.com")
55
+ print(redacted)
56
+ # Call PERSON_1 at PHONE_NUMBER_1 or EMAIL_ADDRESS_1
57
+ ```
58
+
59
+ ### Text redaction (async)
60
+
61
+ ```python
62
+ import asyncio
63
+ from expunct import AsyncExpunct
64
+
65
+ async def main():
66
+ async with AsyncExpunct(api_key="your-api-key") as client:
67
+ redacted = await client.sanitize_text("Call Bob at 415-555-0100 or bob@example.com")
68
+ print(redacted)
69
+
70
+ asyncio.run(main())
71
+ ```
72
+
73
+ ### File redaction (PDF, DOCX, images, audio)
74
+
75
+ ```python
76
+ from expunct import Expunct
77
+
78
+ client = Expunct(api_key="your-api-key")
79
+
80
+ # Pass a file path — returns redacted bytes
81
+ redacted_bytes = client.sanitize_file("contract.pdf")
82
+
83
+ # Save directly to disk
84
+ client.sanitize_file("contract.pdf", dest="contract_redacted.pdf")
85
+
86
+ # Pass a file-like object
87
+ with open("invoice.docx", "rb") as f:
88
+ redacted_bytes = client.sanitize_file(f)
89
+ ```
90
+
91
+ ### URI redaction (cloud storage)
92
+
93
+ Submit a file hosted in cloud storage (S3, GCS, Azure Blob) for redaction. The optional `output_uri` controls where the redacted file is written; if omitted the result is available via `jobs.download()`.
94
+
95
+ ```python
96
+ from expunct import Expunct
97
+
98
+ client = Expunct(api_key="your-api-key")
99
+
100
+ job = client.sanitize_uri(
101
+ "s3://my-bucket/reports/q1.pdf",
102
+ output_uri="s3://my-bucket/reports/q1_redacted.pdf",
103
+ )
104
+ print(job.status) # "completed"
105
+ print(job.findings_count) # number of PII items found
106
+ ```
107
+
108
+ ### Batch URI redaction
109
+
110
+ Enqueue multiple files in one call via the lower-level `redact.batch()` method, then poll the batch status:
111
+
112
+ ```python
113
+ from expunct import Expunct
114
+
115
+ client = Expunct(api_key="your-api-key")
116
+
117
+ batch = client.redact.batch(
118
+ input_uris=[
119
+ "s3://my-bucket/docs/file1.pdf",
120
+ "s3://my-bucket/docs/file2.pdf",
121
+ ],
122
+ language="en",
123
+ )
124
+ print(batch.id, batch.total_jobs)
125
+
126
+ # Poll progress
127
+ status = client.batch.get(batch.id)
128
+ print(status.completed_jobs, status.failed_jobs)
129
+ ```
130
+
131
+ ### Environment variable
132
+
133
+ Set `EXPUNCT_API_KEY` to avoid passing the key in code. The client reads it automatically when no `api_key` argument is provided — or you can read it yourself:
134
+
135
+ ```python
136
+ import os
137
+ from expunct import Expunct
138
+
139
+ client = Expunct(api_key=os.environ["EXPUNCT_API_KEY"])
140
+ ```
141
+
142
+ ### Custom policy
143
+
144
+ Policies let you control which entity types are detected, the redaction method, confidence thresholds, and more. Create a policy once and reference it by ID on every job.
145
+
146
+ ```python
147
+ from expunct import Expunct, PolicyCreate
148
+
149
+ client = Expunct(api_key="your-api-key")
150
+
151
+ # Create a policy that only redacts PII and uses pseudonymization
152
+ policy = client.policies.create(PolicyCreate(
153
+ name="pii-only-pseudonymize",
154
+ pii_categories=["PII"],
155
+ redaction_method="pseudonymization",
156
+ confidence_threshold=0.7,
157
+ ))
158
+
159
+ # Use the policy when uploading a file
160
+ job = client.redact.file("report.pdf", policy_id=policy.id)
161
+ completed = client.wait_for_job(job.id)
162
+ redacted_bytes = client.jobs.download(completed.id)
163
+ ```
164
+
165
+ ### Inspecting findings
166
+
167
+ Every completed job exposes the PII entities that were found:
168
+
169
+ ```python
170
+ from expunct import Expunct
171
+
172
+ client = Expunct(api_key="your-api-key")
173
+
174
+ redacted_bytes = client.sanitize_file("form.pdf")
175
+
176
+ # Re-fetch job detail to inspect findings
177
+ jobs = client.jobs.list(page=1, page_size=1)
178
+ detail = client.jobs.get(jobs.jobs[0].id)
179
+
180
+ for finding in detail.findings:
181
+ print(finding.entity_type, finding.confidence, finding.entity_value)
182
+ ```
183
+
184
+ ### Error handling
185
+
186
+ ```python
187
+ from expunct import Expunct, AuthenticationError, RateLimitError, PollingTimeoutError
188
+
189
+ client = Expunct(api_key="your-api-key")
190
+
191
+ try:
192
+ redacted = client.sanitize_text("Alice, SSN 219-09-9999")
193
+ except AuthenticationError:
194
+ print("Invalid API key")
195
+ except RateLimitError as e:
196
+ print(f"Rate limited — retry after {e.retry_after}s")
197
+ except PollingTimeoutError as e:
198
+ print(f"Job {e.job_id} timed out after {e.timeout}s")
199
+ ```
200
+
201
+ ### Context manager (sync)
202
+
203
+ ```python
204
+ from expunct import Expunct
205
+
206
+ with Expunct(api_key="your-api-key") as client:
207
+ redacted = client.sanitize_text("John Smith, DOB 01/01/1980")
208
+ ```
209
+
210
+ ## Client reference
211
+
212
+ ### `Expunct` / `AsyncExpunct`
213
+
214
+ | Parameter | Type | Default | Description |
215
+ |---|---|---|---|
216
+ | `api_key` | `str` | required | Your Expunct API key |
217
+ | `base_url` | `str` | `https://api.expunct.ai` | Override for self-hosted or staging |
218
+ | `tenant_id` | `str \| None` | `None` | Multi-tenant isolation header |
219
+ | `timeout` | `float` | `30.0` | Per-request timeout in seconds |
220
+ | `max_retries` | `int` | `3` | Automatic retries on transient errors |
221
+
222
+ ### Convenience methods
223
+
224
+ | Method | Returns | Description |
225
+ |---|---|---|
226
+ | `sanitize_text(text, *, language)` | `str` | Redact text in one call (upload → poll → decode) |
227
+ | `sanitize_file(file, *, language, dest)` | `bytes` | Upload a file, poll, return redacted bytes |
228
+ | `sanitize_uri(input_uri, *, language, output_uri)` | `JobDetailResponse` | Submit a URI, poll, return completed job |
229
+ | `wait_for_job(job_id, *, interval, timeout)` | `JobDetailResponse` | Poll a job until it completes or times out |
230
+
231
+ ### Resource methods
232
+
233
+ #### `client.redact`
234
+
235
+ | Method | Returns | Description |
236
+ |---|---|---|
237
+ | `redact.file(file, *, config, language, policy_id)` | `JobResponse` | Upload a file and enqueue a redaction job |
238
+ | `redact.uri(input_uri, *, output_uri, config, language, metadata)` | `JobResponse` | Submit a cloud URI for redaction |
239
+ | `redact.batch(input_uris, *, config, language, metadata)` | `BatchJobResponse` | Submit multiple URIs as a batch |
240
+
241
+ #### `client.jobs`
242
+
243
+ | Method | Returns | Description |
244
+ |---|---|---|
245
+ | `jobs.list(*, page, page_size, status)` | `JobListResponse` | List jobs with optional status filter |
246
+ | `jobs.get(job_id)` | `JobDetailResponse` | Get job detail including findings |
247
+ | `jobs.report(job_id)` | `dict` | Get full structured report for a job |
248
+ | `jobs.download(job_id, *, dest)` | `bytes` | Download redacted output; optionally save to `dest` |
249
+
250
+ #### `client.policies`
251
+
252
+ | Method | Returns | Description |
253
+ |---|---|---|
254
+ | `policies.list()` | `list[PolicyResponse]` | List all policies |
255
+ | `policies.create(policy)` | `PolicyResponse` | Create a new policy |
256
+ | `policies.get(policy_id)` | `PolicyResponse` | Fetch a policy by ID |
257
+ | `policies.update(policy_id, policy)` | `PolicyResponse` | Update a policy |
258
+ | `policies.delete(policy_id)` | `None` | Delete a policy |
259
+
260
+ #### `client.batch`
261
+
262
+ | Method | Returns | Description |
263
+ |---|---|---|
264
+ | `batch.get(batch_id)` | `BatchJobResponse` | Get status of a batch job |
265
+
266
+ #### `client.api_keys`
267
+
268
+ | Method | Returns | Description |
269
+ |---|---|---|
270
+ | `api_keys.list()` | `list[ApiKeyResponse]` | List API keys for your account |
271
+ | `api_keys.create(key)` | `ApiKeyCreateResponse` | Create a new API key |
272
+ | `api_keys.revoke(key_id)` | `dict` | Revoke an API key |
273
+
274
+ #### `client.audit`
275
+
276
+ | Method | Returns | Description |
277
+ |---|---|---|
278
+ | `audit.list(*, page, page_size, event_type)` | `AuditListResponse` | List audit log entries |
279
+
280
+ ## Detected Entity Types
281
+
282
+ Expunct detects the following entity types by default (all categories enabled):
283
+
284
+ **PII (Personally Identifiable Information)**
285
+
286
+ | Type | Example |
287
+ |---|---|
288
+ | `PERSON` | John Smith |
289
+ | `EMAIL_ADDRESS` | john@example.com |
290
+ | `PHONE_NUMBER` | 415-555-0100 |
291
+ | `LOCATION` | San Francisco, CA |
292
+ | `DATE_TIME` | January 1, 1990 |
293
+ | `NRP` | American, French (nationalities, religions, political groups) |
294
+ | `ORGANIZATION` | Acme Corp |
295
+ | `URL` | https://example.com |
296
+ | `IP_ADDRESS` | 192.168.1.1 |
297
+ | `US_DRIVER_LICENSE` | D1234567 |
298
+ | `US_PASSPORT` | 123456789 |
299
+ | `US_ITIN` | 900-70-0000 |
300
+
301
+ **PCI (Payment Card Industry)**
302
+
303
+ | Type | Example |
304
+ |---|---|
305
+ | `CREDIT_CARD` | 4111 1111 1111 1111 |
306
+ | `US_BANK_NUMBER` | 123456789 |
307
+ | `IBAN_CODE` | GB29NWBK60161331926819 |
308
+ | `CRYPTO` | 1BoatSLRHtKNngkdXEeobR76b53LETtpyT |
309
+ | `CVV` | 123 |
310
+ | `EXPIRY_DATE` | 12/26 |
311
+ | `CARD_HOLDER_NAME` | J. Smith |
312
+ | `PIN_NUMBER` | 1234 |
313
+ | `ACCOUNT_NUMBER` | 000123456789 |
314
+
315
+ **PHI (Protected Health Information)**
316
+
317
+ | Type | Example |
318
+ |---|---|
319
+ | `US_SSN` | 219-09-9999 |
320
+ | `MEDICAL_LICENSE` | A1234567 |
321
+
322
+ You can restrict detection to specific types using a `RedactConfig` or by setting `pii_types` on a policy:
323
+
324
+ ```python
325
+ from expunct import Expunct, RedactConfig
326
+
327
+ client = Expunct(api_key="your-api-key")
328
+
329
+ config = RedactConfig(
330
+ pii_types=["PERSON", "EMAIL_ADDRESS", "US_SSN"],
331
+ redaction_method="blur",
332
+ confidence_threshold=0.6,
333
+ )
334
+ job = client.redact.file("document.pdf", config=config.model_dump())
335
+ ```
336
+
337
+ ## Exceptions
338
+
339
+ | Exception | Raised when |
340
+ |---|---|
341
+ | `AuthenticationError` | API key is invalid or expired (401/403) |
342
+ | `NotFoundError` | Job or resource not found (404) |
343
+ | `ValidationError` | Request payload is invalid (422) |
344
+ | `RateLimitError` | Rate limit exceeded after retries (429) |
345
+ | `PollingTimeoutError` | `wait_for_job` exceeded the timeout |
346
+ | `ApiError` | Base class for all SDK errors |
347
+
348
+ ## Links
349
+
350
+ - [Documentation](https://docs.expunct.ai)
351
+ - [API Reference](https://docs.expunct.ai/api-reference)
352
+ - [Sign up free](https://expunct.ai)
353
+ - [GitHub](https://github.com/expunct/python-sdk)
354
+
355
+ ## License
356
+
357
+ MIT