@orderful/droid 0.45.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +8 -2
- package/.github/workflows/claude-issue-agent.yml +1 -2
- package/CHANGELOG.md +14 -0
- package/dist/tools/droid/.claude-plugin/plugin.json +1 -1
- package/dist/tools/droid/TOOL.yaml +1 -1
- package/dist/tools/droid/skills/droid/SKILL.md +1 -0
- package/dist/tools/droid/skills/droid-bootstrap/SKILL.md +1 -0
- package/dist/tools/edi-schema/.claude-plugin/plugin.json +25 -0
- package/dist/tools/edi-schema/TOOL.yaml +29 -0
- package/dist/tools/edi-schema/agents/edi-schema-agent.md +97 -0
- package/dist/tools/edi-schema/commands/edi-schema.md +33 -0
- package/dist/tools/edi-schema/skills/edi-schema/SKILL.md +86 -0
- package/dist/tools/pii/.claude-plugin/plugin.json +25 -0
- package/dist/tools/pii/TOOL.yaml +22 -0
- package/dist/tools/pii/agents/pii-scanner.md +85 -0
- package/dist/tools/pii/commands/pii.md +33 -0
- package/dist/tools/pii/skills/pii/SKILL.md +97 -0
- package/dist/tools/pii/skills/pii/references/supported-entities.md +90 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-analyze.d.ts +18 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-analyze.d.ts.map +1 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-analyze.ts +258 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-init.d.ts +17 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-init.d.ts.map +1 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-init.ts +151 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-redact.d.ts +21 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-redact.d.ts.map +1 -0
- package/dist/tools/pii/skills/pii/scripts/presidio-redact.ts +294 -0
- package/dist/tools/pii/skills/pii/scripts/presidio.test.ts +444 -0
- package/package.json +1 -1
- package/src/tools/droid/.claude-plugin/plugin.json +1 -1
- package/src/tools/droid/TOOL.yaml +1 -1
- package/src/tools/droid/skills/droid/SKILL.md +1 -0
- package/src/tools/droid/skills/droid-bootstrap/SKILL.md +1 -0
- package/src/tools/edi-schema/.claude-plugin/plugin.json +25 -0
- package/src/tools/edi-schema/TOOL.yaml +29 -0
- package/src/tools/edi-schema/agents/edi-schema-agent.md +97 -0
- package/src/tools/edi-schema/commands/edi-schema.md +33 -0
- package/src/tools/edi-schema/skills/edi-schema/SKILL.md +86 -0
- package/src/tools/pii/.claude-plugin/plugin.json +25 -0
- package/src/tools/pii/TOOL.yaml +22 -0
- package/src/tools/pii/agents/pii-scanner.md +85 -0
- package/src/tools/pii/commands/pii.md +33 -0
- package/src/tools/pii/skills/pii/SKILL.md +97 -0
- package/src/tools/pii/skills/pii/references/supported-entities.md +90 -0
- package/src/tools/pii/skills/pii/scripts/presidio-analyze.ts +258 -0
- package/src/tools/pii/skills/pii/scripts/presidio-init.ts +151 -0
- package/src/tools/pii/skills/pii/scripts/presidio-redact.ts +294 -0
- package/src/tools/pii/skills/pii/scripts/presidio.test.ts +444 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Supported PII Entity Types
|
|
2
|
+
|
|
3
|
+
Reference for all entity types detectable by Microsoft Presidio (used by the `/pii` skill).
|
|
4
|
+
|
|
5
|
+
Pass these names to `--entities` to filter detection:
|
|
6
|
+
```bash
|
|
7
|
+
/pii redact notes.md --entities EMAIL_ADDRESS,PHONE_NUMBER
|
|
8
|
+
/pii scan transcript.md --entities US_SSN,CREDIT_CARD
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Global Entities
|
|
14
|
+
|
|
15
|
+
These entity types are supported across all languages and locales.
|
|
16
|
+
|
|
17
|
+
| Entity | Description | Example | Detection Method |
|
|
18
|
+
|--------|-------------|---------|-----------------|
|
|
19
|
+
| `PERSON` | Full or partial personal names | John Smith, Jane | NER (spaCy) |
|
|
20
|
+
| `EMAIL_ADDRESS` | Email addresses | user@example.com | Regex + validation |
|
|
21
|
+
| `PHONE_NUMBER` | Phone numbers (various formats) | +1 555-123-4567, (555) 123-4567 | Regex |
|
|
22
|
+
| `CREDIT_CARD` | Credit card numbers (Luhn-validated) | 4111 1111 1111 1111 | Regex + Luhn algorithm |
|
|
23
|
+
| `IBAN_CODE` | International Bank Account Numbers | GB29 NWBK 6016 1331 9268 19 | Regex + checksum |
|
|
24
|
+
| `IP_ADDRESS` | IPv4 and IPv6 addresses | 192.168.1.1, 2001:db8::1 | Regex |
|
|
25
|
+
| `LOCATION` | Geographic locations and place names | New York, London, 123 Main St | NER (spaCy) |
|
|
26
|
+
| `DATE_TIME` | Dates, times, and datetime values | 2024-01-15, January 15, 3:00 PM | NER (spaCy) |
|
|
27
|
+
| `NRP` | Nationality, religion, political group | American, Christian | NER (spaCy) |
|
|
28
|
+
| `MEDICAL_LICENSE` | Medical licence numbers | MD12345 | Regex |
|
|
29
|
+
| `URL` | Web URLs | https://example.com/path | Regex |
|
|
30
|
+
| `CRYPTO` | Cryptocurrency wallet addresses | 1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2 | Regex |
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## US Entities
|
|
35
|
+
|
|
36
|
+
| Entity | Description | Example | Detection Method |
|
|
37
|
+
|--------|-------------|---------|-----------------|
|
|
38
|
+
| `US_SSN` | US Social Security Numbers | 123-45-6789 | Regex |
|
|
39
|
+
| `US_PASSPORT` | US passport numbers | A12345678 | Regex |
|
|
40
|
+
| `US_ITIN` | US Individual Taxpayer Identification Numbers | 912-00-0000 | Regex |
|
|
41
|
+
| `US_DRIVER_LICENSE` | US driver's licence numbers (state-specific) | A1234567 | Regex |
|
|
42
|
+
| `US_BANK_NUMBER` | US bank account numbers | 123456789012 | Regex |
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## UK Entities
|
|
47
|
+
|
|
48
|
+
| Entity | Description | Example | Detection Method |
|
|
49
|
+
|--------|-------------|---------|-----------------|
|
|
50
|
+
| `UK_NHS` | UK National Health Service numbers | 123 456 7890 | Regex + checksum |
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## European Entities
|
|
55
|
+
|
|
56
|
+
| Entity | Description | Example | Detection Method |
|
|
57
|
+
|--------|-------------|---------|-----------------|
|
|
58
|
+
| `ES_NIF` | Spanish NIF (tax ID) | 12345678A | Regex + checksum |
|
|
59
|
+
| `IT_FISCAL_CODE` | Italian fiscal code | RSSMRA85T10A562S | Regex + checksum |
|
|
60
|
+
| `IT_DRIVER_LICENSE` | Italian driver's licence | AA123456 | Regex |
|
|
61
|
+
| `IT_VAT_CODE` | Italian VAT code | IT12345678901 | Regex |
|
|
62
|
+
| `IT_PASSPORT` | Italian passport | AA1234567 | Regex |
|
|
63
|
+
| `IT_IDENTITY_CARD` | Italian identity card | CA12345AA | Regex |
|
|
64
|
+
| `PL_PESEL` | Polish PESEL national ID | 44051401458 | Regex + checksum |
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Asia-Pacific & Other Entities
|
|
69
|
+
|
|
70
|
+
| Entity | Description | Example | Detection Method |
|
|
71
|
+
|--------|-------------|---------|-----------------|
|
|
72
|
+
| `SG_NRIC_FIN` | Singapore NRIC/FIN | S1234567A | Regex + checksum |
|
|
73
|
+
| `AU_ABN` | Australian Business Number | 51 824 753 556 | Regex + checksum |
|
|
74
|
+
| `AU_ACN` | Australian Company Number | 004 085 616 | Regex + checksum |
|
|
75
|
+
| `AU_TFN` | Australian Tax File Number | 123 456 782 | Regex + checksum |
|
|
76
|
+
| `AU_MEDICARE` | Australian Medicare number | 2123456701 | Regex + checksum |
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Notes
|
|
81
|
+
|
|
82
|
+
- **NER-based** entities (PERSON, LOCATION, DATE_TIME, NRP) use the spaCy `en_core_web_sm` model. Accuracy depends on context — short, ambiguous names may be missed or misidentified.
|
|
83
|
+
- **Regex + checksum** entities are highly accurate — a CREDIT_CARD match always passes Luhn's algorithm.
|
|
84
|
+
- **Confidence scores** in `presidio-analyze.ts` output reflect detection certainty (0.0–1.0). Scores < 0.5 indicate uncertain matches.
|
|
85
|
+
- For custom recognizers (Orderful-specific patterns like API keys, account IDs), see the v2 roadmap in issue #292.
|
|
86
|
+
|
|
87
|
+
## References
|
|
88
|
+
|
|
89
|
+
- [Presidio supported entities documentation](https://microsoft.github.io/presidio/supported_entities/)
|
|
90
|
+
- [Adding custom recognizers](https://microsoft.github.io/presidio/analyzer/adding_recognizers/)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* presidio-analyze
|
|
4
|
+
*
|
|
5
|
+
* Detect PII in a file or text string using Presidio.
|
|
6
|
+
* Shells out to the bundled Python venv.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* bun run presidio-analyze.ts --file transcript.md
|
|
10
|
+
* bun run presidio-analyze.ts --text "Call me at 555-1234"
|
|
11
|
+
* bun run presidio-analyze.ts --file notes.md --entities EMAIL_ADDRESS,PHONE_NUMBER
|
|
12
|
+
*
|
|
13
|
+
* Output (JSON):
|
|
14
|
+
* { "success": true, "entities": [{ "type": "EMAIL_ADDRESS", "start": 10, "end": 25, "score": 0.85, "line": 3 }] }
|
|
15
|
+
* { "success": false, "error": "...", "init_required": true }
|
|
16
|
+
*/
|
|
17
|
+
export {};
|
|
18
|
+
//# sourceMappingURL=presidio-analyze.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"presidio-analyze.d.ts","sourceRoot":"","sources":["../../../../../../src/tools/pii/skills/pii/scripts/presidio-analyze.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;GAcG"}
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* presidio-analyze
|
|
4
|
+
*
|
|
5
|
+
* Detect PII in a file or text string using Presidio.
|
|
6
|
+
* Shells out to the bundled Python venv.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* bun run presidio-analyze.ts --file transcript.md
|
|
10
|
+
* bun run presidio-analyze.ts --text "Call me at 555-1234"
|
|
11
|
+
* bun run presidio-analyze.ts --file notes.md --entities EMAIL_ADDRESS,PHONE_NUMBER
|
|
12
|
+
*
|
|
13
|
+
* Output (JSON):
|
|
14
|
+
* { "success": true, "entities": [{ "type": "EMAIL_ADDRESS", "start": 10, "end": 25, "score": 0.85, "line": 3 }] }
|
|
15
|
+
* { "success": false, "error": "...", "init_required": true }
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { execSync } from 'child_process';
|
|
19
|
+
import { existsSync, mkdirSync, writeFileSync, unlinkSync, readFileSync } from 'fs';
|
|
20
|
+
import { join } from 'path';
|
|
21
|
+
import { tmpdir } from 'os';
|
|
22
|
+
|
|
23
|
+
const VENV_PATH = join(process.env.HOME || '', '.droid', 'runtimes', 'presidio');
|
|
24
|
+
const VENV_PYTHON = join(VENV_PATH, 'bin', 'python3');
|
|
25
|
+
const MAX_BUFFER_BYTES = 50 * 1024 * 1024;
|
|
26
|
+
const ENTITY_NAME_PATTERN = /^[A-Z0-9_]+$/;
|
|
27
|
+
const SUPPORTED_ENTITIES = new Set([
|
|
28
|
+
'PERSON',
|
|
29
|
+
'EMAIL_ADDRESS',
|
|
30
|
+
'PHONE_NUMBER',
|
|
31
|
+
'CREDIT_CARD',
|
|
32
|
+
'IBAN_CODE',
|
|
33
|
+
'IP_ADDRESS',
|
|
34
|
+
'LOCATION',
|
|
35
|
+
'DATE_TIME',
|
|
36
|
+
'NRP',
|
|
37
|
+
'MEDICAL_LICENSE',
|
|
38
|
+
'URL',
|
|
39
|
+
'CRYPTO',
|
|
40
|
+
'US_SSN',
|
|
41
|
+
'US_PASSPORT',
|
|
42
|
+
'US_ITIN',
|
|
43
|
+
'US_DRIVER_LICENSE',
|
|
44
|
+
'US_BANK_NUMBER',
|
|
45
|
+
'UK_NHS',
|
|
46
|
+
'ES_NIF',
|
|
47
|
+
'IT_FISCAL_CODE',
|
|
48
|
+
'IT_DRIVER_LICENSE',
|
|
49
|
+
'IT_VAT_CODE',
|
|
50
|
+
'IT_PASSPORT',
|
|
51
|
+
'IT_IDENTITY_CARD',
|
|
52
|
+
'PL_PESEL',
|
|
53
|
+
'SG_NRIC_FIN',
|
|
54
|
+
'AU_ABN',
|
|
55
|
+
'AU_ACN',
|
|
56
|
+
'AU_TFN',
|
|
57
|
+
'AU_MEDICARE',
|
|
58
|
+
]);
|
|
59
|
+
|
|
60
|
+
interface Entity {
|
|
61
|
+
type: string;
|
|
62
|
+
start: number;
|
|
63
|
+
end: number;
|
|
64
|
+
score: number;
|
|
65
|
+
line: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
interface AnalyzeResult {
|
|
69
|
+
success: boolean;
|
|
70
|
+
entities?: Entity[];
|
|
71
|
+
error?: string;
|
|
72
|
+
init_required?: boolean;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
interface ParsedArgs {
|
|
76
|
+
file?: string;
|
|
77
|
+
text?: string;
|
|
78
|
+
entities?: string[];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function parseArgs(args: string[]): ParsedArgs {
|
|
82
|
+
const result: ParsedArgs = {};
|
|
83
|
+
|
|
84
|
+
for (let i = 0; i < args.length; i++) {
|
|
85
|
+
const arg = args[i];
|
|
86
|
+
if (arg === '--file' && args[i + 1]) {
|
|
87
|
+
result.file = args[++i];
|
|
88
|
+
} else if (arg === '--text' && args[i + 1]) {
|
|
89
|
+
result.text = args[++i];
|
|
90
|
+
} else if (arg === '--entities' && args[i + 1]) {
|
|
91
|
+
result.entities = args[++i].split(',').map(e => e.trim());
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function computeLineNumber(text: string, offset: number): number {
|
|
99
|
+
const before = text.slice(0, offset);
|
|
100
|
+
return before.split('\n').length;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function validateEntities(entities: string[] | undefined): string | undefined {
|
|
104
|
+
if (!entities || entities.length === 0) {
|
|
105
|
+
return undefined;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
for (const entity of entities) {
|
|
109
|
+
if (!ENTITY_NAME_PATTERN.test(entity)) {
|
|
110
|
+
return `Invalid entity type: ${entity}. Allowed pattern: ${ENTITY_NAME_PATTERN.source}`;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (!SUPPORTED_ENTITIES.has(entity)) {
|
|
114
|
+
return `Unsupported entity type: ${entity}`;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return undefined;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function run(cmd: string): { ok: boolean; stdout: string; stderr: string } {
|
|
122
|
+
try {
|
|
123
|
+
const output = execSync(cmd, {
|
|
124
|
+
encoding: 'utf-8',
|
|
125
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
126
|
+
maxBuffer: MAX_BUFFER_BYTES,
|
|
127
|
+
});
|
|
128
|
+
return { ok: true, stdout: output, stderr: '' };
|
|
129
|
+
} catch (err: unknown) {
|
|
130
|
+
const error = err as { stdout?: string; stderr?: string; message?: string };
|
|
131
|
+
return {
|
|
132
|
+
ok: false,
|
|
133
|
+
stdout: error.stdout || '',
|
|
134
|
+
stderr: error.stderr || error.message || 'Unknown error',
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function presidioAnalyze(parsed: ParsedArgs): AnalyzeResult {
|
|
140
|
+
// Validate venv exists
|
|
141
|
+
if (!existsSync(VENV_PYTHON)) {
|
|
142
|
+
return {
|
|
143
|
+
success: false,
|
|
144
|
+
error: 'Presidio venv not found. Run presidio-init.ts first.',
|
|
145
|
+
init_required: true,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Validate input
|
|
150
|
+
if (!parsed.file && !parsed.text) {
|
|
151
|
+
return {
|
|
152
|
+
success: false,
|
|
153
|
+
error: 'Either --file or --text is required.',
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const entitiesError = validateEntities(parsed.entities);
|
|
158
|
+
if (entitiesError) {
|
|
159
|
+
return {
|
|
160
|
+
success: false,
|
|
161
|
+
error: entitiesError,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Read source text for line number computation
|
|
166
|
+
let sourceText: string;
|
|
167
|
+
if (parsed.file) {
|
|
168
|
+
if (!existsSync(parsed.file)) {
|
|
169
|
+
return { success: false, error: `File not found: ${parsed.file}` };
|
|
170
|
+
}
|
|
171
|
+
try {
|
|
172
|
+
sourceText = readFileSync(parsed.file, 'utf-8');
|
|
173
|
+
} catch (err: unknown) {
|
|
174
|
+
const e = err as { message?: string };
|
|
175
|
+
return { success: false, error: `Failed to read file: ${e.message}` };
|
|
176
|
+
}
|
|
177
|
+
} else {
|
|
178
|
+
sourceText = parsed.text!;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Build Python inline script
|
|
182
|
+
const entitiesArg = parsed.entities && parsed.entities.length > 0
|
|
183
|
+
? `entities=[${parsed.entities.map(e => `"${e}"`).join(', ')}]`
|
|
184
|
+
: '';
|
|
185
|
+
|
|
186
|
+
const pythonScript = `
|
|
187
|
+
import sys, json
|
|
188
|
+
from presidio_analyzer import AnalyzerEngine
|
|
189
|
+
|
|
190
|
+
engine = AnalyzerEngine()
|
|
191
|
+
text = ${JSON.stringify(sourceText)}
|
|
192
|
+
results = engine.analyze(text=text, language='en'${entitiesArg ? ', ' + entitiesArg : ''})
|
|
193
|
+
output = []
|
|
194
|
+
for r in results:
|
|
195
|
+
output.append({
|
|
196
|
+
'type': r.entity_type,
|
|
197
|
+
'start': r.start,
|
|
198
|
+
'end': r.end,
|
|
199
|
+
'score': round(r.score, 4)
|
|
200
|
+
})
|
|
201
|
+
print(json.dumps(output))
|
|
202
|
+
`.trim();
|
|
203
|
+
|
|
204
|
+
// Write tmp script
|
|
205
|
+
const tmpDir = tmpdir();
|
|
206
|
+
const tmpScript = join(tmpDir, `pii-analyze-${Date.now()}.py`);
|
|
207
|
+
|
|
208
|
+
try {
|
|
209
|
+
mkdirSync(tmpDir, { recursive: true });
|
|
210
|
+
writeFileSync(tmpScript, pythonScript, 'utf-8');
|
|
211
|
+
} catch (err: unknown) {
|
|
212
|
+
const e = err as { message?: string };
|
|
213
|
+
return { success: false, error: `Failed to write temp script: ${e.message}` };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
try {
|
|
217
|
+
const result = run(`"${VENV_PYTHON}" "${tmpScript}"`);
|
|
218
|
+
|
|
219
|
+
if (!result.ok) {
|
|
220
|
+
return {
|
|
221
|
+
success: false,
|
|
222
|
+
error: `Presidio analysis failed: ${result.stderr}`,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
let rawEntities: Array<{ type: string; start: number; end: number; score: number }>;
|
|
227
|
+
try {
|
|
228
|
+
rawEntities = JSON.parse(result.stdout.trim());
|
|
229
|
+
} catch {
|
|
230
|
+
return { success: false, error: `Failed to parse Presidio output: ${result.stdout}` };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Compute line numbers
|
|
234
|
+
const entities: Entity[] = rawEntities.map(e => ({
|
|
235
|
+
...e,
|
|
236
|
+
line: computeLineNumber(sourceText, e.start),
|
|
237
|
+
}));
|
|
238
|
+
|
|
239
|
+
return { success: true, entities };
|
|
240
|
+
} finally {
|
|
241
|
+
// Clean up tmp file
|
|
242
|
+
try {
|
|
243
|
+
unlinkSync(tmpScript);
|
|
244
|
+
} catch {
|
|
245
|
+
// Ignore cleanup errors
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Main
|
|
251
|
+
const args = process.argv.slice(2);
|
|
252
|
+
const parsed = parseArgs(args);
|
|
253
|
+
const result = presidioAnalyze(parsed);
|
|
254
|
+
console.log(JSON.stringify(result, null, 2));
|
|
255
|
+
|
|
256
|
+
if (!result.success) {
|
|
257
|
+
process.exit(1);
|
|
258
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* presidio-init
|
|
4
|
+
*
|
|
5
|
+
* One-time bootstrap for the Presidio Python venv.
|
|
6
|
+
* Idempotent: no-op if already initialised (marker file check + venv binary check).
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* bun run presidio-init.ts
|
|
10
|
+
*
|
|
11
|
+
* Output (JSON):
|
|
12
|
+
* { "success": true, "already_existed": true }
|
|
13
|
+
* { "success": true, "initialized": true, "python_path": "...", "venv_path": "..." }
|
|
14
|
+
* { "success": false, "error": "..." }
|
|
15
|
+
*/
|
|
16
|
+
export {};
|
|
17
|
+
//# sourceMappingURL=presidio-init.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"presidio-init.d.ts","sourceRoot":"","sources":["../../../../../../src/tools/pii/skills/pii/scripts/presidio-init.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;GAaG"}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* presidio-init
|
|
4
|
+
*
|
|
5
|
+
* One-time bootstrap for the Presidio Python venv.
|
|
6
|
+
* Idempotent: no-op if already initialised (marker file check + venv binary check).
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* bun run presidio-init.ts
|
|
10
|
+
*
|
|
11
|
+
* Output (JSON):
|
|
12
|
+
* { "success": true, "already_existed": true }
|
|
13
|
+
* { "success": true, "initialized": true, "python_path": "...", "venv_path": "..." }
|
|
14
|
+
* { "success": false, "error": "..." }
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { execSync } from 'child_process';
|
|
18
|
+
import { existsSync, mkdirSync, writeFileSync } from 'fs';
|
|
19
|
+
import { join } from 'path';
|
|
20
|
+
|
|
21
|
+
const VENV_PATH = join(process.env.HOME || '', '.droid', 'runtimes', 'presidio');
|
|
22
|
+
const MARKER_FILE = join(VENV_PATH, '.droid-initialized');
|
|
23
|
+
const VENV_PYTHON = join(VENV_PATH, 'bin', 'python3');
|
|
24
|
+
|
|
25
|
+
interface InitResult {
|
|
26
|
+
success: boolean;
|
|
27
|
+
already_existed?: boolean;
|
|
28
|
+
initialized?: boolean;
|
|
29
|
+
python_path?: string;
|
|
30
|
+
venv_path?: string;
|
|
31
|
+
error?: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function run(
|
|
35
|
+
cmd: string,
|
|
36
|
+
opts: { visible?: boolean; cwd?: string } = {}
|
|
37
|
+
): { ok: boolean; output: string } {
|
|
38
|
+
try {
|
|
39
|
+
const output = execSync(cmd, {
|
|
40
|
+
cwd: opts.cwd,
|
|
41
|
+
encoding: 'utf-8',
|
|
42
|
+
stdio: opts.visible ? 'inherit' : ['pipe', 'pipe', 'pipe'],
|
|
43
|
+
});
|
|
44
|
+
return { ok: true, output: typeof output === 'string' ? output.trim() : '' };
|
|
45
|
+
} catch (err: unknown) {
|
|
46
|
+
const error = err as { stderr?: string; message?: string };
|
|
47
|
+
return { ok: false, output: error.stderr || error.message || 'Unknown error' };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function checkPythonVersion(): { ok: boolean; version?: string; error?: string } {
|
|
52
|
+
const result = run('python3 --version');
|
|
53
|
+
if (!result.ok) {
|
|
54
|
+
return { ok: false, error: 'python3 not found. Install Python 3.8+ from python.org or via: brew install python3' };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const match = result.output.match(/Python (\d+)\.(\d+)/);
|
|
58
|
+
if (!match) {
|
|
59
|
+
return { ok: false, error: `Could not parse Python version from: ${result.output}` };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const major = parseInt(match[1], 10);
|
|
63
|
+
const minor = parseInt(match[2], 10);
|
|
64
|
+
|
|
65
|
+
if (major < 3 || (major === 3 && minor < 8)) {
|
|
66
|
+
return {
|
|
67
|
+
ok: false,
|
|
68
|
+
error: `Python 3.8+ required, found ${result.output.trim()}. Upgrade via: brew install python3`,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return { ok: true, version: result.output.trim() };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function presidioInit(): InitResult {
|
|
76
|
+
// Fast path: marker file + binary both exist
|
|
77
|
+
if (existsSync(MARKER_FILE) && existsSync(VENV_PYTHON)) {
|
|
78
|
+
return { success: true, already_existed: true };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Check Python version before attempting setup
|
|
82
|
+
const pythonCheck = checkPythonVersion();
|
|
83
|
+
if (!pythonCheck.ok) {
|
|
84
|
+
return { success: false, error: pythonCheck.error };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Create parent directory
|
|
88
|
+
try {
|
|
89
|
+
mkdirSync(join(process.env.HOME || '', '.droid', 'runtimes'), { recursive: true });
|
|
90
|
+
} catch (err: unknown) {
|
|
91
|
+
const e = err as { message?: string };
|
|
92
|
+
return { success: false, error: `Failed to create runtimes directory: ${e.message}` };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Step 1: Create venv
|
|
96
|
+
console.error('[pii] Creating Python venv (first run — this takes ~2–3 min)...');
|
|
97
|
+
const venvResult = run(`python3 -m venv "${VENV_PATH}"`, { visible: true });
|
|
98
|
+
if (!venvResult.ok) {
|
|
99
|
+
return { success: false, error: `Failed to create venv: ${venvResult.output}` };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Step 2: Upgrade pip
|
|
103
|
+
console.error('[pii] Upgrading pip...');
|
|
104
|
+
const pipUpgrade = run(`"${VENV_PYTHON}" -m pip install --quiet --upgrade pip`, { visible: true });
|
|
105
|
+
if (!pipUpgrade.ok) {
|
|
106
|
+
return { success: false, error: `Failed to upgrade pip: ${pipUpgrade.output}` };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Step 3: Install Presidio packages
|
|
110
|
+
console.error('[pii] Installing presidio-analyzer, presidio-anonymizer, spacy...');
|
|
111
|
+
const installResult = run(
|
|
112
|
+
`"${VENV_PYTHON}" -m pip install --quiet presidio-analyzer presidio-anonymizer spacy`,
|
|
113
|
+
{ visible: true }
|
|
114
|
+
);
|
|
115
|
+
if (!installResult.ok) {
|
|
116
|
+
return { success: false, error: `Failed to install Presidio: ${installResult.output}` };
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Step 4: Download spaCy model (~400 MB — used by Presidio's default NLP engine)
|
|
120
|
+
console.error('[pii] Downloading spaCy en_core_web_lg model (~400 MB)...');
|
|
121
|
+
const spaCyResult = run(
|
|
122
|
+
`"${VENV_PYTHON}" -m spacy download en_core_web_lg`,
|
|
123
|
+
{ visible: true }
|
|
124
|
+
);
|
|
125
|
+
if (!spaCyResult.ok) {
|
|
126
|
+
return { success: false, error: `Failed to download spaCy model: ${spaCyResult.output}` };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Write marker file
|
|
130
|
+
try {
|
|
131
|
+
writeFileSync(MARKER_FILE, new Date().toISOString());
|
|
132
|
+
} catch (err: unknown) {
|
|
133
|
+
const e = err as { message?: string };
|
|
134
|
+
return { success: false, error: `Failed to write marker file: ${e.message}` };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
success: true,
|
|
139
|
+
initialized: true,
|
|
140
|
+
python_path: VENV_PYTHON,
|
|
141
|
+
venv_path: VENV_PATH,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Main
|
|
146
|
+
const result = presidioInit();
|
|
147
|
+
console.log(JSON.stringify(result, null, 2));
|
|
148
|
+
|
|
149
|
+
if (!result.success) {
|
|
150
|
+
process.exit(1);
|
|
151
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* presidio-redact
|
|
4
|
+
*
|
|
5
|
+
* Redact PII in a file using Presidio.
|
|
6
|
+
* Shells out to the bundled Python venv.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* bun run presidio-redact.ts --file transcript.md
|
|
10
|
+
* bun run presidio-redact.ts --file transcript.md --output clean.md
|
|
11
|
+
* bun run presidio-redact.ts --file transcript.md --dry-run
|
|
12
|
+
* bun run presidio-redact.ts --file transcript.md --entities EMAIL_ADDRESS,PHONE_NUMBER
|
|
13
|
+
* bun run presidio-redact.ts --file transcript.md --mask
|
|
14
|
+
*
|
|
15
|
+
* Output (JSON):
|
|
16
|
+
* { "success": true, "dry_run": false, "original_path": "...", "output_path": "...", "entities_found": 3, "entities_redacted": 3 }
|
|
17
|
+
* { "success": true, "dry_run": true, "original_path": "...", "entities_found": 3, "entities_redacted": 3, "redacted_text": "..." }
|
|
18
|
+
* { "success": false, "error": "..." }
|
|
19
|
+
*/
|
|
20
|
+
export {};
|
|
21
|
+
//# sourceMappingURL=presidio-redact.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"presidio-redact.d.ts","sourceRoot":"","sources":["../../../../../../src/tools/pii/skills/pii/scripts/presidio-redact.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;;;;;GAiBG"}
|