skillware 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skillware-0.2.2/skillware.egg-info → skillware-0.2.3}/PKG-INFO +1 -1
- {skillware-0.2.2 → skillware-0.2.3}/pyproject.toml +1 -1
- skillware-0.2.3/skills/compliance/pii_masker/__init__.py +3 -0
- skillware-0.2.3/skills/compliance/pii_masker/skill.py +77 -0
- {skillware-0.2.2 → skillware-0.2.3/skillware.egg-info}/PKG-INFO +1 -1
- {skillware-0.2.2 → skillware-0.2.3}/skillware.egg-info/SOURCES.txt +2 -0
- {skillware-0.2.2 → skillware-0.2.3}/LICENSE +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/README.md +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/setup.cfg +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/data_engineering/synthetic_generator/__init__.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/data_engineering/synthetic_generator/skill.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/finance/wallet_screening/__init__.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/finance/wallet_screening/maintenance/normalization_tool.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/finance/wallet_screening/maintenance/normalize_uniswap_trm.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/finance/wallet_screening/skill.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/office/pdf_form_filler/skill.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/office/pdf_form_filler/utils.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/optimization/prompt_rewriter/__init__.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skills/optimization/prompt_rewriter/skill.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skillware/__init__.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skillware/core/__init__.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skillware/core/base_skill.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skillware/core/env.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skillware/core/loader.py +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skillware.egg-info/dependency_links.txt +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skillware.egg-info/requires.txt +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/skillware.egg-info/top_level.txt +0 -0
- {skillware-0.2.2 → skillware-0.2.3}/tests/test_loader.py +0 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import requests
|
|
3
|
+
from typing import Any, Dict, List, Tuple
|
|
4
|
+
from skillware.core.base_skill import BaseSkill
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PIIMaskerSkill(BaseSkill):
|
|
8
|
+
"""
|
|
9
|
+
Skill to mask/redact PII from text using the arpacorp/micro-f1-mask model via Ollama.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def manifest(self) -> Dict[str, Any]:
|
|
14
|
+
return {
|
|
15
|
+
"name": "compliance/pii_masker",
|
|
16
|
+
"version": "0.1.0"
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
20
|
+
text = params.get("text", "")
|
|
21
|
+
mode = params.get("mode", "mask")
|
|
22
|
+
ollama_url = params.get("ollama_url", "http://localhost:11434")
|
|
23
|
+
|
|
24
|
+
sanitized_text, detected_entities = self._call_ollama(text, ollama_url)
|
|
25
|
+
sanitized_text = self._apply_mode(sanitized_text, mode)
|
|
26
|
+
|
|
27
|
+
# Build unique entity types list
|
|
28
|
+
entities = list(set([re.sub(r'_[0-9]+$', '', e) for e in detected_entities]))
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
"sanitized_text": sanitized_text,
|
|
32
|
+
"metadata": {
|
|
33
|
+
"detected_entities": entities,
|
|
34
|
+
"entity_count": len(detected_entities),
|
|
35
|
+
"security_level": "local-only",
|
|
36
|
+
"model": "arpacorp/micro-f1-mask"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def _call_ollama(self, text: str, endpoint: str) -> Tuple[str, List[str]]:
|
|
41
|
+
try:
|
|
42
|
+
response = requests.post(f"{endpoint}/api/generate", json={
|
|
43
|
+
"model": "arpacorp/micro-f1-mask",
|
|
44
|
+
"prompt": text,
|
|
45
|
+
"stream": False
|
|
46
|
+
}, timeout=30)
|
|
47
|
+
if response.status_code == 200:
|
|
48
|
+
result_text = response.json().get("response", text)
|
|
49
|
+
else:
|
|
50
|
+
# If Ollama is down or model missing, return original text as a fallback
|
|
51
|
+
# or we could throw an exception. We'll return the raw text if it fails
|
|
52
|
+
result_text = text
|
|
53
|
+
except requests.exceptions.RequestException:
|
|
54
|
+
# Fall back to returning the text unmasked if Ollama is unreachable.
|
|
55
|
+
# In a strict environment, you might want to block here.
|
|
56
|
+
result_text = text
|
|
57
|
+
|
|
58
|
+
# Detect entities in the response
|
|
59
|
+
detected = re.findall(r'\[([A-Z_]+(?:_[0-9]+)?)\]', result_text)
|
|
60
|
+
return result_text, detected
|
|
61
|
+
|
|
62
|
+
def _apply_mode(self, text: str, mode: str) -> str:
|
|
63
|
+
if mode == "mask":
|
|
64
|
+
return text
|
|
65
|
+
|
|
66
|
+
# Pattern to catch [DOCUMENT], [PERSON_1], etc.
|
|
67
|
+
pattern = r'\[[A-Z_]+(?:_[0-9]+)?\]'
|
|
68
|
+
if mode == "redact":
|
|
69
|
+
return re.sub(pattern, "XXXX", text)
|
|
70
|
+
elif mode == "remove":
|
|
71
|
+
# Replace token and any immediate preceding/following spaces safely
|
|
72
|
+
# A simple sub is sufficient. Cleaning up double spaces.
|
|
73
|
+
text = re.sub(pattern, "", text)
|
|
74
|
+
text = re.sub(r'\s+', ' ', text).strip()
|
|
75
|
+
return text
|
|
76
|
+
|
|
77
|
+
return text
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
|
+
skills/compliance/pii_masker/__init__.py
|
|
5
|
+
skills/compliance/pii_masker/skill.py
|
|
4
6
|
skills/data_engineering/synthetic_generator/__init__.py
|
|
5
7
|
skills/data_engineering/synthetic_generator/skill.py
|
|
6
8
|
skills/finance/wallet_screening/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|