skillware 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skillware-0.2.3/skillware.egg-info → skillware-0.2.4}/PKG-INFO +1 -1
- {skillware-0.2.3 → skillware-0.2.4}/pyproject.toml +1 -1
- skillware-0.2.4/skills/compliance/mica_module/__init__.py +3 -0
- skillware-0.2.4/skills/compliance/mica_module/skill.py +229 -0
- {skillware-0.2.3 → skillware-0.2.4/skillware.egg-info}/PKG-INFO +1 -1
- {skillware-0.2.3 → skillware-0.2.4}/skillware.egg-info/SOURCES.txt +4 -1
- skillware-0.2.4/tests/test_mica_module.py +66 -0
- {skillware-0.2.3 → skillware-0.2.4}/LICENSE +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/README.md +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/setup.cfg +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/compliance/pii_masker/__init__.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/compliance/pii_masker/skill.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/data_engineering/synthetic_generator/__init__.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/data_engineering/synthetic_generator/skill.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/finance/wallet_screening/__init__.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/finance/wallet_screening/maintenance/normalization_tool.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/finance/wallet_screening/maintenance/normalize_uniswap_trm.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/finance/wallet_screening/skill.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/office/pdf_form_filler/skill.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/office/pdf_form_filler/utils.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/optimization/prompt_rewriter/__init__.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skills/optimization/prompt_rewriter/skill.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skillware/__init__.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skillware/core/__init__.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skillware/core/base_skill.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skillware/core/env.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skillware/core/loader.py +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skillware.egg-info/dependency_links.txt +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skillware.egg-info/requires.txt +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/skillware.egg-info/top_level.txt +0 -0
- {skillware-0.2.3 → skillware-0.2.4}/tests/test_loader.py +0 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
import google.generativeai as genai
|
|
5
|
+
from skillware.core.base_skill import BaseSkill
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MiCAModuleSkill(BaseSkill):
|
|
9
|
+
"""
|
|
10
|
+
Acts as a highly specialized, localized RAG and policy enforcement engine for MiCA.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def manifest(self) -> Dict[str, Any]:
|
|
15
|
+
return {"name": "compliance/mica_module", "version": "0.1.0"}
|
|
16
|
+
|
|
17
|
+
_corpus_cache: List[Dict[str, Any]] = None
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: Dict[str, Any] = None):
|
|
20
|
+
super().__init__(config)
|
|
21
|
+
self._ensure_corpus_loaded()
|
|
22
|
+
|
|
23
|
+
def _ensure_corpus_loaded(self):
|
|
24
|
+
"""Lazy loader for the MiCA JSON corpus."""
|
|
25
|
+
if MiCAModuleSkill._corpus_cache is not None:
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
corpus_path = os.path.join(os.path.dirname(__file__), "mica_corpus.json")
|
|
29
|
+
try:
|
|
30
|
+
with open(corpus_path, "r", encoding="utf-8") as f:
|
|
31
|
+
MiCAModuleSkill._corpus_cache = json.load(f)
|
|
32
|
+
except Exception as e:
|
|
33
|
+
print(f"Error loading MiCA corpus: {e}")
|
|
34
|
+
MiCAModuleSkill._corpus_cache = []
|
|
35
|
+
|
|
36
|
+
def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
37
|
+
user_prompt = params.get("user_prompt", "")
|
|
38
|
+
run_evaluator = params.get("run_evaluator", False)
|
|
39
|
+
evaluator_model = params.get("evaluator_model", "gemini-2.5-flash-lite")
|
|
40
|
+
|
|
41
|
+
# Use the cached corpus
|
|
42
|
+
mica_data = MiCAModuleSkill._corpus_cache
|
|
43
|
+
|
|
44
|
+
# 2. Extract Intent and Route to matched sections
|
|
45
|
+
relevant_chunks = self._route_and_fetch(user_prompt, mica_data)
|
|
46
|
+
|
|
47
|
+
# Format the retrieved sections list
|
|
48
|
+
retrieved_sections = []
|
|
49
|
+
context_text = ""
|
|
50
|
+
for chunk in relevant_chunks:
|
|
51
|
+
title_info = chunk.get("title_num", "")
|
|
52
|
+
if chunk.get("title_name"):
|
|
53
|
+
title_info += f": {chunk.get('title_name')}"
|
|
54
|
+
|
|
55
|
+
art_info = chunk.get("article_num", "")
|
|
56
|
+
if chunk.get("article_title"):
|
|
57
|
+
art_info += f": {chunk.get('article_title')}"
|
|
58
|
+
|
|
59
|
+
sec_name = f"{title_info} | {art_info}"
|
|
60
|
+
retrieved_sections.append(sec_name)
|
|
61
|
+
context_text += f"\n--- {sec_name} ---\n{chunk.get('content', '')}\n"
|
|
62
|
+
|
|
63
|
+
# 3. Default Policy Status if no evaluator runs
|
|
64
|
+
policy_status = "CAUTION"
|
|
65
|
+
gemini_feedback = {
|
|
66
|
+
"grade": "N/A",
|
|
67
|
+
"holes_found": (
|
|
68
|
+
"Evaluator disabled. Review MiCA context manually for regulatory holes."
|
|
69
|
+
),
|
|
70
|
+
"suggestion": "Follow the integrated MiCA chunks exactly.",
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if not retrieved_sections:
|
|
74
|
+
final_context = "No specific MiCA articles matched the user query."
|
|
75
|
+
else:
|
|
76
|
+
final_context = (
|
|
77
|
+
"Output your final answer seamlessly integrating and adhering to "
|
|
78
|
+
f"these MiCA rules:\n{context_text}"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# 4. Optional Evaluator Node execution
|
|
82
|
+
if run_evaluator and relevant_chunks:
|
|
83
|
+
eval_result = self._run_evaluator(
|
|
84
|
+
user_prompt, context_text, evaluator_model
|
|
85
|
+
)
|
|
86
|
+
policy_status = eval_result.get("policy_status", policy_status)
|
|
87
|
+
gemini_feedback = eval_result.get(
|
|
88
|
+
"gemini_evaluator_feedback", gemini_feedback
|
|
89
|
+
)
|
|
90
|
+
final_context = eval_result.get("final_context_for_agent", final_context)
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
"retrieved_sections": list(set(retrieved_sections)),
|
|
94
|
+
"policy_status": policy_status,
|
|
95
|
+
"gemini_evaluator_feedback": gemini_feedback,
|
|
96
|
+
"final_context_for_agent": final_context,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
def _route_and_fetch(
|
|
100
|
+
self, prompt: str, corpus: List[Dict[str, Any]]
|
|
101
|
+
) -> List[Dict[str, Any]]:
|
|
102
|
+
# Lightweight keyword overlap router to prevent huge context bloat.
|
|
103
|
+
prompt_lower = prompt.lower()
|
|
104
|
+
|
|
105
|
+
# Normalize common spelling variations (US to UK for the European regulation)
|
|
106
|
+
replacements = {
|
|
107
|
+
"authorization": "authorisation",
|
|
108
|
+
"authorize": "authorise",
|
|
109
|
+
"organization": "organisation",
|
|
110
|
+
"crypto asset": "crypto-asset",
|
|
111
|
+
"stablecoin": "asset-referenced token", # High-level intent mapping
|
|
112
|
+
}
|
|
113
|
+
normalized_prompt = prompt_lower
|
|
114
|
+
for us, uk in replacements.items():
|
|
115
|
+
normalized_prompt = normalized_prompt.replace(us, uk)
|
|
116
|
+
|
|
117
|
+
# We look for significant words to increase collision hits
|
|
118
|
+
prompt_words = [
|
|
119
|
+
w.lower()
|
|
120
|
+
for w in normalized_prompt.replace("?", "")
|
|
121
|
+
.replace(".", "")
|
|
122
|
+
.replace(",", "")
|
|
123
|
+
.split()
|
|
124
|
+
if len(w) > 3
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
scored_matches = []
|
|
128
|
+
# We look for significant words to increase collision hits
|
|
129
|
+
prompt_words = [
|
|
130
|
+
w.lower()
|
|
131
|
+
for w in normalized_prompt.replace("?", "")
|
|
132
|
+
.replace(".", "")
|
|
133
|
+
.replace(",", "")
|
|
134
|
+
.split()
|
|
135
|
+
if len(w) > 3
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
for article in corpus:
|
|
139
|
+
score = 0
|
|
140
|
+
keywords = [k.lower() for k in article.get("keywords", [])]
|
|
141
|
+
art_num = article.get("article_num", "").lower()
|
|
142
|
+
art_title = article.get("article_title", "").lower()
|
|
143
|
+
|
|
144
|
+
# Match 1: Specific article mention (Highest Priority)
|
|
145
|
+
if art_num and f"article {art_num}" in normalized_prompt:
|
|
146
|
+
score += 100
|
|
147
|
+
|
|
148
|
+
# Match 2: Exact keyword match in prompt
|
|
149
|
+
for k in keywords:
|
|
150
|
+
if k in normalized_prompt:
|
|
151
|
+
score += 20
|
|
152
|
+
|
|
153
|
+
# Match 3: Article title collision
|
|
154
|
+
if any(w in art_title for w in prompt_words):
|
|
155
|
+
score += 10
|
|
156
|
+
|
|
157
|
+
# Match 4: Significant word collision with keywords (Normalized by length)
|
|
158
|
+
collision_count = 0
|
|
159
|
+
for w in prompt_words:
|
|
160
|
+
for k in keywords:
|
|
161
|
+
if w in k:
|
|
162
|
+
# Favor specificity: longer word matches are more significant
|
|
163
|
+
collision_count += len(w) / max(len(k), 1)
|
|
164
|
+
score += collision_count * 5
|
|
165
|
+
|
|
166
|
+
if score > 0:
|
|
167
|
+
scored_matches.append((score, article))
|
|
168
|
+
|
|
169
|
+
# Sort by score descending
|
|
170
|
+
scored_matches.sort(key=lambda x: x[0], reverse=True)
|
|
171
|
+
|
|
172
|
+
# Deduplicate and limit
|
|
173
|
+
unique_matches = []
|
|
174
|
+
seen = set()
|
|
175
|
+
for score, m in scored_matches:
|
|
176
|
+
a_id = f"{m.get('title_num', '')}_{m.get('article_num', '')}"
|
|
177
|
+
if a_id not in seen:
|
|
178
|
+
unique_matches.append(m)
|
|
179
|
+
seen.add(a_id)
|
|
180
|
+
|
|
181
|
+
# Return top 10 most relevant hits to maximize production depth
|
|
182
|
+
return unique_matches[:10]
|
|
183
|
+
|
|
184
|
+
def _run_evaluator(
|
|
185
|
+
self, prompt: str, context: str, model_name: str
|
|
186
|
+
) -> Dict[str, Any]:
|
|
187
|
+
prompt_payload = f"""
|
|
188
|
+
You are a MiCA Regulation Evaluator.
|
|
189
|
+
User Query: {prompt}
|
|
190
|
+
MiCA Rule Context from RAG: {context}
|
|
191
|
+
|
|
192
|
+
Draft a response silently to see what an AI would say based on the user query.
|
|
193
|
+
Then, evaluate that draft against the MiCA rules to see if it violates
|
|
194
|
+
anything or misses vital compliance disclosures (like publishing a
|
|
195
|
+
White Paper, authorization required, etc).
|
|
196
|
+
Return exactly a JSON summarizing the grade and issues found.
|
|
197
|
+
Schema:
|
|
198
|
+
{{
|
|
199
|
+
"policy_status": "APPROVED|CAUTION|HIGH_RISK_DETECTED",
|
|
200
|
+
"gemini_evaluator_feedback": {{
|
|
201
|
+
"grade": "<Letter Grade (A to F)>",
|
|
202
|
+
"holes_found": "<Issues the drafted response missed>",
|
|
203
|
+
"suggestion": "<How the agent should fix the holes in its final answer>"
|
|
204
|
+
}},
|
|
205
|
+
"final_context_for_agent": "Output instructions for the agent embedding your suggestion and the context."
|
|
206
|
+
}}
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
model = genai.GenerativeModel(model_name)
|
|
211
|
+
resp = model.generate_content(
|
|
212
|
+
prompt_payload,
|
|
213
|
+
generation_config=genai.GenerationConfig(
|
|
214
|
+
response_mime_type="application/json", temperature=0.0
|
|
215
|
+
),
|
|
216
|
+
)
|
|
217
|
+
return json.loads(resp.text)
|
|
218
|
+
except Exception as e:
|
|
219
|
+
return {
|
|
220
|
+
"policy_status": "CAUTION",
|
|
221
|
+
"gemini_evaluator_feedback": {
|
|
222
|
+
"grade": "N/A",
|
|
223
|
+
"holes_found": f"Evaluator API failed or rate-limited: {str(e)}",
|
|
224
|
+
"suggestion": "Proceed manually integrating the extracted logic.",
|
|
225
|
+
},
|
|
226
|
+
"final_context_for_agent": (
|
|
227
|
+
f"Output your final answer seamlessly integrating and adhering to these MiCA rules:\n{context}"
|
|
228
|
+
),
|
|
229
|
+
}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
|
+
skills/compliance/mica_module/__init__.py
|
|
5
|
+
skills/compliance/mica_module/skill.py
|
|
4
6
|
skills/compliance/pii_masker/__init__.py
|
|
5
7
|
skills/compliance/pii_masker/skill.py
|
|
6
8
|
skills/data_engineering/synthetic_generator/__init__.py
|
|
@@ -23,4 +25,5 @@ skillware/core/__init__.py
|
|
|
23
25
|
skillware/core/base_skill.py
|
|
24
26
|
skillware/core/env.py
|
|
25
27
|
skillware/core/loader.py
|
|
26
|
-
tests/test_loader.py
|
|
28
|
+
tests/test_loader.py
|
|
29
|
+
tests/test_mica_module.py
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from skillware.core.loader import SkillLoader
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# Fixture to load the skill module
|
|
6
|
+
@pytest.fixture
|
|
7
|
+
def mica_skill():
|
|
8
|
+
skill_bundle = SkillLoader.load_skill("compliance/mica_module")
|
|
9
|
+
MiCAModuleSkill = skill_bundle["module"].MiCAModuleSkill
|
|
10
|
+
return MiCAModuleSkill()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_mica_module_manifest(mica_skill):
|
|
14
|
+
manifest = mica_skill.manifest
|
|
15
|
+
assert manifest["name"] == "compliance/mica_module"
|
|
16
|
+
assert manifest["version"] == "0.1.0"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_mica_module_stateless_rag_execution(mica_skill):
|
|
20
|
+
# Test that the module correctly pulls information without running the evaluator
|
|
21
|
+
params = {
|
|
22
|
+
"user_prompt": "I want to issue an asset-referenced token. What are the authorization rules?",
|
|
23
|
+
"run_evaluator": False,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
result = mica_skill.execute(params)
|
|
27
|
+
|
|
28
|
+
# Since run_evaluator is False, policy_status should default to CAUTION
|
|
29
|
+
assert result["policy_status"] == "CAUTION"
|
|
30
|
+
|
|
31
|
+
# It should have either found some chunks or correctly reported no matches
|
|
32
|
+
assert "retrieved_sections" in result
|
|
33
|
+
assert "final_context_for_agent" in result
|
|
34
|
+
|
|
35
|
+
feedback = result["gemini_evaluator_feedback"]
|
|
36
|
+
assert "Evaluator disabled" in feedback["holes_found"]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_mica_module_router_normalization(mica_skill):
|
|
40
|
+
# Verify that 'authorization' (US) matches 'authorisation' (UK)
|
|
41
|
+
mock_corpus = [
|
|
42
|
+
{
|
|
43
|
+
"title_num": "Title V",
|
|
44
|
+
"article_num": "Article 59",
|
|
45
|
+
"keywords": ["authorisation", "casp"],
|
|
46
|
+
"content": "CASP Authorization rules...",
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
matched = mica_skill._route_and_fetch("Authorization of a CASP", mock_corpus)
|
|
50
|
+
assert len(matched) > 0
|
|
51
|
+
assert matched[0]["article_num"] == "Article 59"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_mica_module_router_deduplication(mica_skill):
|
|
55
|
+
# Verify that multiple keyword matches dont duplicate the same article
|
|
56
|
+
mock_corpus = [
|
|
57
|
+
{
|
|
58
|
+
"title_num": "Title V",
|
|
59
|
+
"article_num": "Article 59",
|
|
60
|
+
"keywords": ["authorisation", "casp"],
|
|
61
|
+
"content": "CASP Authorization rules...",
|
|
62
|
+
}
|
|
63
|
+
]
|
|
64
|
+
# 'authorisation' and 'casp' both match
|
|
65
|
+
matched = mica_skill._route_and_fetch("authorisation casp", mock_corpus)
|
|
66
|
+
assert len(matched) == 1
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|