cisco-ai-skill-scanner 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/METADATA +16 -1
- {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/RECORD +37 -35
- skill_scanner/_version.py +2 -2
- skill_scanner/api/api_cli.py +2 -2
- skill_scanner/api/api_server.py +1 -1
- skill_scanner/cli/cli.py +60 -2
- skill_scanner/config/yara_modes.py +314 -0
- skill_scanner/core/analyzers/llm_analyzer.py +3 -3
- skill_scanner/core/analyzers/meta_analyzer.py +50 -18
- skill_scanner/core/analyzers/static.py +177 -27
- skill_scanner/core/models.py +1 -0
- skill_scanner/core/reporters/markdown_reporter.py +9 -3
- skill_scanner/core/static_analysis/context_extractor.py +87 -13
- skill_scanner/data/prompts/code_alignment_threat_analysis_prompt.md +103 -28
- skill_scanner/data/prompts/llm_response_schema.json +3 -3
- skill_scanner/data/prompts/skill_meta_analysis_prompt.md +10 -9
- skill_scanner/data/prompts/skill_threat_analysis_prompt.md +42 -6
- skill_scanner/data/rules/signatures.yaml +141 -35
- skill_scanner/data/yara_rules/autonomy_abuse_generic.yara +66 -0
- skill_scanner/data/yara_rules/{skill_discovery_abuse.yara → capability_inflation_generic.yara} +7 -4
- skill_scanner/data/yara_rules/code_execution_generic.yara +76 -0
- skill_scanner/data/yara_rules/{coercive_injection.yara → coercive_injection_generic.yara} +2 -2
- skill_scanner/data/yara_rules/command_injection_generic.yara +77 -0
- skill_scanner/data/yara_rules/{credential_harvesting.yara → credential_harvesting_generic.yara} +25 -4
- skill_scanner/data/yara_rules/{transitive_trust_abuse.yara → indirect_prompt_injection_generic.yara} +8 -5
- skill_scanner/data/yara_rules/{prompt_injection.yara → prompt_injection_generic.yara} +2 -2
- skill_scanner/data/yara_rules/{unicode_steganography.yara → prompt_injection_unicode_steganography.yara} +23 -17
- skill_scanner/data/yara_rules/script_injection_generic.yara +82 -0
- skill_scanner/data/yara_rules/{sql_injection.yara → sql_injection_generic.yara} +22 -8
- skill_scanner/data/yara_rules/system_manipulation_generic.yara +79 -0
- skill_scanner/data/yara_rules/tool_chaining_abuse_generic.yara +72 -0
- skill_scanner/threats/__init__.py +24 -2
- skill_scanner/threats/cisco_ai_taxonomy.py +274 -0
- skill_scanner/threats/threats.py +28 -99
- skill_scanner/data/yara_rules/autonomy_abuse.yara +0 -66
- skill_scanner/data/yara_rules/code_execution.yara +0 -61
- skill_scanner/data/yara_rules/command_injection.yara +0 -54
- skill_scanner/data/yara_rules/script_injection.yara +0 -83
- skill_scanner/data/yara_rules/system_manipulation.yara +0 -65
- skill_scanner/data/yara_rules/tool_chaining_abuse.yara +0 -60
- {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/WHEEL +0 -0
- {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/entry_points.txt +0 -0
- {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -787,29 +787,42 @@ def apply_meta_analysis_to_results(
|
|
|
787
787
|
meta_result: MetaAnalysisResult,
|
|
788
788
|
skill: Skill,
|
|
789
789
|
) -> list[Finding]:
|
|
790
|
-
"""Apply meta-analysis results to
|
|
790
|
+
"""Apply meta-analysis results to enrich all findings with metadata.
|
|
791
791
|
|
|
792
792
|
This function:
|
|
793
|
-
1.
|
|
793
|
+
1. Marks false positives with metadata (but keeps them in output)
|
|
794
794
|
2. Adds meta-analysis enrichments to validated findings
|
|
795
795
|
3. Adds any new threats detected by meta-analyzer
|
|
796
796
|
|
|
797
|
+
All findings are retained in the output with metadata indicating whether
|
|
798
|
+
they were identified as false positives. This allows downstream consumers
|
|
799
|
+
(like VS Code extensions) to filter or display them as needed.
|
|
800
|
+
|
|
797
801
|
Args:
|
|
798
802
|
original_findings: Original findings from all analyzers
|
|
799
803
|
meta_result: Results from meta-analysis
|
|
800
804
|
skill: The skill being analyzed
|
|
801
805
|
|
|
802
806
|
Returns:
|
|
803
|
-
|
|
807
|
+
All findings with meta-analysis metadata added
|
|
804
808
|
"""
|
|
805
|
-
# Build
|
|
806
|
-
|
|
809
|
+
# Build false positive lookup with reasons and metadata
|
|
810
|
+
fp_data: dict[int, dict[str, Any]] = {}
|
|
807
811
|
for fp in meta_result.false_positives:
|
|
808
812
|
if "_index" in fp:
|
|
809
|
-
|
|
813
|
+
fp_data[fp["_index"]] = {
|
|
814
|
+
"reason": fp.get("reason") or fp.get("false_positive_reason") or "Identified as likely false positive",
|
|
815
|
+
"confidence": fp.get("confidence"),
|
|
816
|
+
}
|
|
810
817
|
|
|
811
818
|
# Build enrichment lookup from validated findings
|
|
812
|
-
enrichments = {}
|
|
819
|
+
enrichments: dict[int, dict[str, Any]] = {}
|
|
820
|
+
priority_lookup: dict[int, int] = {}
|
|
821
|
+
|
|
822
|
+
# Build priority rank lookup from priority_order
|
|
823
|
+
for rank, idx in enumerate(meta_result.priority_order, start=1):
|
|
824
|
+
priority_lookup[idx] = rank
|
|
825
|
+
|
|
813
826
|
for vf in meta_result.validated_findings:
|
|
814
827
|
idx = vf.get("_index")
|
|
815
828
|
if idx is not None:
|
|
@@ -821,25 +834,44 @@ def apply_meta_analysis_to_results(
|
|
|
821
834
|
"meta_impact": vf.get("impact"),
|
|
822
835
|
}
|
|
823
836
|
|
|
824
|
-
#
|
|
837
|
+
# Enrich all findings (do not filter out false positives)
|
|
825
838
|
result_findings = []
|
|
826
839
|
for i, finding in enumerate(original_findings):
|
|
827
|
-
#
|
|
828
|
-
if
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
#
|
|
832
|
-
if i in
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
840
|
+
# Ensure metadata dict exists
|
|
841
|
+
if finding.metadata is None:
|
|
842
|
+
finding.metadata = {}
|
|
843
|
+
|
|
844
|
+
# Mark false positives with metadata (but keep them in output)
|
|
845
|
+
if i in fp_data:
|
|
846
|
+
finding.metadata["meta_false_positive"] = True
|
|
847
|
+
finding.metadata["meta_reason"] = fp_data[i]["reason"]
|
|
848
|
+
if fp_data[i].get("confidence") is not None:
|
|
849
|
+
finding.metadata["meta_confidence"] = fp_data[i]["confidence"]
|
|
836
850
|
else:
|
|
837
|
-
|
|
851
|
+
# Mark as validated (not a false positive)
|
|
852
|
+
finding.metadata["meta_false_positive"] = False
|
|
853
|
+
|
|
854
|
+
# Add enrichments if available for validated findings
|
|
855
|
+
if i in enrichments:
|
|
856
|
+
for key, value in enrichments[i].items():
|
|
857
|
+
if value is not None:
|
|
858
|
+
finding.metadata[key] = value
|
|
859
|
+
else:
|
|
860
|
+
finding.metadata["meta_reviewed"] = True
|
|
861
|
+
|
|
862
|
+
# Add priority rank if available
|
|
863
|
+
if i in priority_lookup:
|
|
864
|
+
finding.metadata["meta_priority"] = priority_lookup[i]
|
|
838
865
|
|
|
839
866
|
result_findings.append(finding)
|
|
840
867
|
|
|
841
868
|
# Add missed threats as new findings
|
|
842
869
|
missed_findings = meta_result.get_missed_threats(skill)
|
|
870
|
+
for mf in missed_findings:
|
|
871
|
+
# Ensure missed threats are marked as validated (not false positives)
|
|
872
|
+
if mf.metadata is None:
|
|
873
|
+
mf.metadata = {}
|
|
874
|
+
mf.metadata["meta_false_positive"] = False
|
|
843
875
|
result_findings.extend(missed_findings)
|
|
844
876
|
|
|
845
877
|
return result_findings
|
|
@@ -24,6 +24,7 @@ import re
|
|
|
24
24
|
from pathlib import Path
|
|
25
25
|
from typing import Any
|
|
26
26
|
|
|
27
|
+
from ...config.yara_modes import DEFAULT_YARA_MODE, YaraModeConfig
|
|
27
28
|
from ...core.models import Finding, Severity, Skill, ThreatCategory
|
|
28
29
|
from ...core.rules.patterns import RuleLoader, SecurityRule
|
|
29
30
|
from ...core.rules.yara_scanner import YaraScanner
|
|
@@ -91,28 +92,93 @@ _RM_TARGET_PATTERN = re.compile(r"rm\s+-r[^;]*?\s+([^\s;]+)")
|
|
|
91
92
|
class StaticAnalyzer(BaseAnalyzer):
|
|
92
93
|
"""Static pattern-based security analyzer."""
|
|
93
94
|
|
|
94
|
-
def __init__(
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
rules_file: Path | None = None,
|
|
98
|
+
use_yara: bool = True,
|
|
99
|
+
yara_mode: YaraModeConfig | str | None = None,
|
|
100
|
+
custom_yara_rules_path: str | Path | None = None,
|
|
101
|
+
disabled_rules: set[str] | None = None,
|
|
102
|
+
):
|
|
95
103
|
"""
|
|
96
104
|
Initialize static analyzer.
|
|
97
105
|
|
|
98
106
|
Args:
|
|
99
|
-
rules_file: Optional custom rules file
|
|
107
|
+
rules_file: Optional custom YAML rules file
|
|
100
108
|
use_yara: Whether to use YARA scanning (default: True)
|
|
109
|
+
yara_mode: YARA detection mode - can be:
|
|
110
|
+
- YaraModeConfig instance
|
|
111
|
+
- Mode name string: "strict", "balanced", "permissive"
|
|
112
|
+
- None for default (balanced)
|
|
113
|
+
custom_yara_rules_path: Path to directory containing custom YARA rules
|
|
114
|
+
(.yara files). If provided, uses these instead of built-in rules.
|
|
115
|
+
disabled_rules: Set of rule names to disable. Rules can be YARA rule
|
|
116
|
+
names (e.g., "YARA_script_injection") or static rule IDs
|
|
117
|
+
(e.g., "COMMAND_INJECTION_EVAL").
|
|
101
118
|
"""
|
|
102
119
|
super().__init__("static_analyzer")
|
|
103
120
|
|
|
104
121
|
self.rule_loader = RuleLoader(rules_file)
|
|
105
122
|
self.rule_loader.load_rules()
|
|
106
123
|
|
|
124
|
+
# Configure YARA mode
|
|
125
|
+
if yara_mode is None:
|
|
126
|
+
self.yara_mode = DEFAULT_YARA_MODE
|
|
127
|
+
elif isinstance(yara_mode, str):
|
|
128
|
+
self.yara_mode = YaraModeConfig.from_mode_name(yara_mode)
|
|
129
|
+
else:
|
|
130
|
+
self.yara_mode = yara_mode
|
|
131
|
+
|
|
132
|
+
# Store disabled rules (merge with mode-based disabled rules)
|
|
133
|
+
self.disabled_rules = set(disabled_rules or set())
|
|
134
|
+
self.disabled_rules.update(self.yara_mode.disabled_rules)
|
|
135
|
+
|
|
136
|
+
# Store custom YARA rules path
|
|
137
|
+
self.custom_yara_rules_path = Path(custom_yara_rules_path) if custom_yara_rules_path else None
|
|
138
|
+
|
|
107
139
|
self.use_yara = use_yara
|
|
108
140
|
self.yara_scanner = None
|
|
109
141
|
if use_yara:
|
|
110
142
|
try:
|
|
111
|
-
|
|
143
|
+
# Use custom rules path if provided
|
|
144
|
+
if self.custom_yara_rules_path:
|
|
145
|
+
self.yara_scanner = YaraScanner(rules_dir=self.custom_yara_rules_path)
|
|
146
|
+
logger.info("Using custom YARA rules from: %s", self.custom_yara_rules_path)
|
|
147
|
+
else:
|
|
148
|
+
self.yara_scanner = YaraScanner()
|
|
112
149
|
except Exception as e:
|
|
113
150
|
logger.warning("Could not load YARA scanner: %s", e)
|
|
114
151
|
self.yara_scanner = None
|
|
115
152
|
|
|
153
|
+
def _is_rule_enabled(self, rule_name: str) -> bool:
|
|
154
|
+
"""
|
|
155
|
+
Check if a rule is enabled.
|
|
156
|
+
|
|
157
|
+
A rule is enabled if:
|
|
158
|
+
1. It's enabled in the current YARA mode
|
|
159
|
+
2. It's not in the explicitly disabled rules set
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
rule_name: Name of the rule to check (e.g., "YARA_script_injection")
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
True if the rule is enabled, False otherwise
|
|
166
|
+
"""
|
|
167
|
+
# Check mode-based enable/disable first
|
|
168
|
+
if not self.yara_mode.is_rule_enabled(rule_name):
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
# Check if explicitly disabled via --disable-rule
|
|
172
|
+
if rule_name in self.disabled_rules:
|
|
173
|
+
return False
|
|
174
|
+
|
|
175
|
+
# Also check without YARA_ prefix for convenience
|
|
176
|
+
base_name = rule_name.replace("YARA_", "") if rule_name.startswith("YARA_") else rule_name
|
|
177
|
+
if base_name in self.disabled_rules:
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
return True
|
|
181
|
+
|
|
116
182
|
def analyze(self, skill: Skill) -> list[Finding]:
|
|
117
183
|
"""
|
|
118
184
|
Analyze skill using static pattern matching.
|
|
@@ -144,6 +210,10 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
144
210
|
|
|
145
211
|
findings.extend(self._scan_asset_files(skill))
|
|
146
212
|
|
|
213
|
+
# Filter out disabled rules
|
|
214
|
+
if self.disabled_rules:
|
|
215
|
+
findings = [f for f in findings if self._is_rule_enabled(f.rule_id)]
|
|
216
|
+
|
|
147
217
|
return findings
|
|
148
218
|
|
|
149
219
|
def _check_manifest(self, skill: Skill) -> list[Finding]:
|
|
@@ -157,7 +227,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
157
227
|
id=self._generate_finding_id("MANIFEST_INVALID_NAME", "manifest"),
|
|
158
228
|
rule_id="MANIFEST_INVALID_NAME",
|
|
159
229
|
category=ThreatCategory.POLICY_VIOLATION,
|
|
160
|
-
severity=Severity.
|
|
230
|
+
severity=Severity.INFO,
|
|
161
231
|
title="Skill name does not follow agent skills naming rules",
|
|
162
232
|
description=(
|
|
163
233
|
f"Skill name '{manifest.name}' is invalid. Agent skills require lowercase letters, numbers, "
|
|
@@ -246,7 +316,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
246
316
|
id=self._generate_finding_id("MANIFEST_MISSING_LICENSE", "manifest"),
|
|
247
317
|
rule_id="MANIFEST_MISSING_LICENSE",
|
|
248
318
|
category=ThreatCategory.POLICY_VIOLATION,
|
|
249
|
-
severity=Severity.
|
|
319
|
+
severity=Severity.INFO,
|
|
250
320
|
title="Skill does not specify a license",
|
|
251
321
|
description="Skill manifest does not include a 'license' field. Specifying a license helps users understand usage terms.",
|
|
252
322
|
file_path="SKILL.md",
|
|
@@ -553,7 +623,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
553
623
|
]
|
|
554
624
|
|
|
555
625
|
socket_external_indicators = ["socket.connect", "socket.create_connection"]
|
|
556
|
-
socket_localhost_indicators = ["localhost", "127.0.0.1", "
|
|
626
|
+
socket_localhost_indicators = ["localhost", "127.0.0.1", "::1"]
|
|
557
627
|
|
|
558
628
|
for skill_file in skill.get_scripts():
|
|
559
629
|
content = skill_file.read_content()
|
|
@@ -622,8 +692,8 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
622
692
|
Finding(
|
|
623
693
|
id=self._generate_finding_id("ALLOWED_TOOLS_WRITE_VIOLATION", skill.name),
|
|
624
694
|
rule_id="ALLOWED_TOOLS_WRITE_VIOLATION",
|
|
625
|
-
category=ThreatCategory.
|
|
626
|
-
severity=Severity.
|
|
695
|
+
category=ThreatCategory.POLICY_VIOLATION,
|
|
696
|
+
severity=Severity.MEDIUM,
|
|
627
697
|
title="Skill declares no Write tool but bundled scripts write files",
|
|
628
698
|
description=(
|
|
629
699
|
f"Skill restricts tools to {skill.manifest.allowed_tools} but bundled scripts appear to "
|
|
@@ -651,22 +721,11 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
651
721
|
)
|
|
652
722
|
)
|
|
653
723
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
id=self._generate_finding_id("ALLOWED_TOOLS_PYTHON_VIOLATION", skill.name),
|
|
660
|
-
rule_id="ALLOWED_TOOLS_PYTHON_VIOLATION",
|
|
661
|
-
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
662
|
-
severity=Severity.HIGH,
|
|
663
|
-
title="Python scripts present but Python tool not in allowed-tools",
|
|
664
|
-
description=f"Skill restricts tools to {skill.manifest.allowed_tools} but includes Python scripts",
|
|
665
|
-
file_path=None,
|
|
666
|
-
remediation="Add 'Python' to allowed-tools or remove Python scripts",
|
|
667
|
-
analyzer="static",
|
|
668
|
-
)
|
|
669
|
-
)
|
|
724
|
+
# Note: ALLOWED_TOOLS_PYTHON_VIOLATION removed - too many false positives
|
|
725
|
+
# Many skills include Python helper scripts that are NOT invoked directly by the agent
|
|
726
|
+
# (e.g., build scripts, test files, utilities). The allowed-tools list controls what
|
|
727
|
+
# the AGENT can use, not what helper scripts exist in the repo.
|
|
728
|
+
# If direct Python execution is a concern, COMMAND_INJECTION_EVAL catches actual risks.
|
|
670
729
|
|
|
671
730
|
if "grep" not in allowed_tools_lower:
|
|
672
731
|
if self._code_uses_grep(skill):
|
|
@@ -927,6 +986,10 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
927
986
|
|
|
928
987
|
yara_matches = self.yara_scanner.scan_content(skill.instruction_body, "SKILL.md")
|
|
929
988
|
for match in yara_matches:
|
|
989
|
+
rule_name = match.get("rule_name", "")
|
|
990
|
+
# Check if rule is enabled in current mode and not explicitly disabled
|
|
991
|
+
if not self._is_rule_enabled(rule_name):
|
|
992
|
+
continue
|
|
930
993
|
findings.extend(self._create_findings_from_yara_match(match, skill))
|
|
931
994
|
|
|
932
995
|
for skill_file in skill.get_scripts():
|
|
@@ -935,7 +998,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
935
998
|
yara_matches = self.yara_scanner.scan_content(content, skill_file.relative_path)
|
|
936
999
|
for match in yara_matches:
|
|
937
1000
|
rule_name = match.get("rule_name", "")
|
|
938
|
-
if rule_name == "
|
|
1001
|
+
if rule_name == "capability_inflation_generic":
|
|
939
1002
|
continue
|
|
940
1003
|
findings.extend(self._create_findings_from_yara_match(match, skill, content))
|
|
941
1004
|
|
|
@@ -1006,8 +1069,28 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
1006
1069
|
".cache",
|
|
1007
1070
|
}
|
|
1008
1071
|
|
|
1072
|
+
PLACEHOLDER_MARKERS = {
|
|
1073
|
+
"your-",
|
|
1074
|
+
"your_",
|
|
1075
|
+
"your ",
|
|
1076
|
+
"example",
|
|
1077
|
+
"sample",
|
|
1078
|
+
"dummy",
|
|
1079
|
+
"placeholder",
|
|
1080
|
+
"replace",
|
|
1081
|
+
"changeme",
|
|
1082
|
+
"change_me",
|
|
1083
|
+
"<your",
|
|
1084
|
+
"<insert",
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1009
1087
|
for string_match in match["strings"]:
|
|
1010
|
-
|
|
1088
|
+
# Skip exclusion patterns (these are used in YARA conditions but shouldn't create findings)
|
|
1089
|
+
string_identifier = string_match.get("identifier", "")
|
|
1090
|
+
if string_identifier.startswith("$documentation") or string_identifier.startswith("$safe"):
|
|
1091
|
+
continue
|
|
1092
|
+
|
|
1093
|
+
if rule_name == "code_execution_generic":
|
|
1011
1094
|
line_content = string_match.get("line_content", "").lower()
|
|
1012
1095
|
matched_data = string_match.get("matched_data", "").lower()
|
|
1013
1096
|
|
|
@@ -1028,7 +1111,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
1028
1111
|
if is_safe_command:
|
|
1029
1112
|
continue
|
|
1030
1113
|
|
|
1031
|
-
if rule_name == "
|
|
1114
|
+
if rule_name == "system_manipulation_generic":
|
|
1032
1115
|
line_content = string_match.get("line_content", "").lower()
|
|
1033
1116
|
|
|
1034
1117
|
if "rm -rf" in line_content or "rm -r" in line_content:
|
|
@@ -1040,6 +1123,73 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
1040
1123
|
if all_safe:
|
|
1041
1124
|
continue
|
|
1042
1125
|
|
|
1126
|
+
# Credential harvesting post-filters (controlled by mode)
|
|
1127
|
+
if rule_name == "credential_harvesting_generic":
|
|
1128
|
+
if self.yara_mode.credential_harvesting.filter_placeholder_patterns:
|
|
1129
|
+
line_content = string_match.get("line_content", "")
|
|
1130
|
+
matched_data = string_match.get("matched_data", "")
|
|
1131
|
+
combined = f"{line_content} {matched_data}".lower()
|
|
1132
|
+
|
|
1133
|
+
if any(marker in combined for marker in PLACEHOLDER_MARKERS):
|
|
1134
|
+
continue
|
|
1135
|
+
|
|
1136
|
+
if "export " in combined and "=" in combined:
|
|
1137
|
+
_, value = combined.split("=", 1)
|
|
1138
|
+
if any(marker in value for marker in PLACEHOLDER_MARKERS):
|
|
1139
|
+
continue
|
|
1140
|
+
|
|
1141
|
+
# Tool chaining post-filters (controlled by mode)
|
|
1142
|
+
if rule_name == "tool_chaining_abuse_generic":
|
|
1143
|
+
line_content = string_match.get("line_content", "")
|
|
1144
|
+
lower_line = line_content.lower()
|
|
1145
|
+
exfil_hints = ("send", "upload", "transmit", "webhook", "slack", "exfil", "forward")
|
|
1146
|
+
|
|
1147
|
+
if self.yara_mode.tool_chaining.filter_generic_http_verbs:
|
|
1148
|
+
if (
|
|
1149
|
+
"get" in lower_line
|
|
1150
|
+
and "post" in lower_line
|
|
1151
|
+
and not any(hint in lower_line for hint in exfil_hints)
|
|
1152
|
+
):
|
|
1153
|
+
continue
|
|
1154
|
+
|
|
1155
|
+
if self.yara_mode.tool_chaining.filter_api_documentation:
|
|
1156
|
+
if any(
|
|
1157
|
+
token in line_content for token in ("@app.", "app.", "router.", "route", "endpoint")
|
|
1158
|
+
) and not any(hint in lower_line for hint in exfil_hints):
|
|
1159
|
+
continue
|
|
1160
|
+
|
|
1161
|
+
if self.yara_mode.tool_chaining.filter_email_field_mentions:
|
|
1162
|
+
if "by email" in lower_line or "email address" in lower_line or "email field" in lower_line:
|
|
1163
|
+
continue
|
|
1164
|
+
|
|
1165
|
+
# Unicode steganography post-filters
|
|
1166
|
+
if rule_name == "prompt_injection_unicode_steganography":
|
|
1167
|
+
line_content = string_match.get("line_content", "")
|
|
1168
|
+
matched_data = string_match.get("matched_data", "")
|
|
1169
|
+
has_ascii_letters = any("A" <= char <= "Z" or "a" <= char <= "z" for char in line_content)
|
|
1170
|
+
|
|
1171
|
+
# Filter short matches in non-Latin context (likely legitimate i18n)
|
|
1172
|
+
if len(matched_data) <= 2 and not has_ascii_letters:
|
|
1173
|
+
continue
|
|
1174
|
+
|
|
1175
|
+
# Filter if context suggests legitimate internationalization
|
|
1176
|
+
i18n_markers = ("i18n", "locale", "translation", "lang=", "charset", "utf-8", "encoding")
|
|
1177
|
+
if any(marker in line_content.lower() for marker in i18n_markers):
|
|
1178
|
+
continue
|
|
1179
|
+
|
|
1180
|
+
# Filter Cyrillic, CJK, Arabic, Hebrew text (legitimate non-Latin content)
|
|
1181
|
+
# These are indicated by presence of those scripts without zero-width chars
|
|
1182
|
+
cyrillic_cjk_pattern = any(
|
|
1183
|
+
("\u0400" <= char <= "\u04ff") # Cyrillic
|
|
1184
|
+
or ("\u4e00" <= char <= "\u9fff") # CJK Unified
|
|
1185
|
+
or ("\u0600" <= char <= "\u06ff") # Arabic
|
|
1186
|
+
or ("\u0590" <= char <= "\u05ff") # Hebrew
|
|
1187
|
+
for char in line_content
|
|
1188
|
+
)
|
|
1189
|
+
# If the line has legitimate non-Latin text but matched only 1-2 zero-width chars, skip
|
|
1190
|
+
if cyrillic_cjk_pattern and len(matched_data) < 10:
|
|
1191
|
+
continue
|
|
1192
|
+
|
|
1043
1193
|
finding_id = self._generate_finding_id(f"YARA_{rule_name}", f"{file_path}:{string_match['line_number']}")
|
|
1044
1194
|
|
|
1045
1195
|
description = meta.get("description", f"YARA rule {rule_name} matched")
|
skill_scanner/core/models.py
CHANGED
|
@@ -49,6 +49,7 @@ class ThreatCategory(str, Enum):
|
|
|
49
49
|
RESOURCE_ABUSE = "resource_abuse"
|
|
50
50
|
POLICY_VIOLATION = "policy_violation"
|
|
51
51
|
MALWARE = "malware"
|
|
52
|
+
HARMFUL_CONTENT = "harmful_content"
|
|
52
53
|
# New threat categories
|
|
53
54
|
SKILL_DISCOVERY_ABUSE = "skill_discovery_abuse"
|
|
54
55
|
TRANSITIVE_TRUST_ABUSE = "transitive_trust_abuse"
|
|
@@ -18,6 +18,8 @@
|
|
|
18
18
|
Markdown format reporter for scan results.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
+
import re
|
|
22
|
+
|
|
21
23
|
from ...core.models import Finding, Report, ScanResult, Severity
|
|
22
24
|
|
|
23
25
|
|
|
@@ -137,6 +139,7 @@ class MarkdownReporter:
|
|
|
137
139
|
lines.append("")
|
|
138
140
|
|
|
139
141
|
for result in report.scan_results:
|
|
142
|
+
lines.append("\n---\n")
|
|
140
143
|
status_icon = "[OK]" if result.is_safe else "[FAIL]"
|
|
141
144
|
lines.append(f"### {status_icon} {result.skill_name}")
|
|
142
145
|
lines.append("")
|
|
@@ -186,9 +189,12 @@ class MarkdownReporter:
|
|
|
186
189
|
if finding.snippet:
|
|
187
190
|
lines.append(f"{indent_str}")
|
|
188
191
|
lines.append(f"{indent_str}**Code Snippet:**")
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
+
if not re.search(r"```", finding.snippet):
|
|
193
|
+
lines.append(f"{indent_str}```")
|
|
194
|
+
for line in finding.snippet.splitlines():
|
|
195
|
+
lines.append(f"{indent_str}{line}")
|
|
196
|
+
if not re.search(r"```", finding.snippet):
|
|
197
|
+
lines.append(f"{indent_str}```")
|
|
192
198
|
|
|
193
199
|
if finding.remediation:
|
|
194
200
|
lines.append(f"{indent_str}")
|
|
@@ -141,18 +141,68 @@ class SkillFunctionContext:
|
|
|
141
141
|
class ContextExtractor:
|
|
142
142
|
"""Extract comprehensive security context from skill scripts."""
|
|
143
143
|
|
|
144
|
-
|
|
144
|
+
# ONLY flag URLs to explicitly suspicious domains - not all unknown URLs
|
|
145
|
+
# Reference: https://lots-project.com/ (Living Off Trusted Sites)
|
|
146
|
+
SUSPICIOUS_DOMAINS = [
|
|
147
|
+
# Known exfil/C2/paste services (LOTS: Download, Exfiltration, C&C)
|
|
148
|
+
"pastebin.com",
|
|
149
|
+
"hastebin.com",
|
|
150
|
+
"paste.ee",
|
|
151
|
+
"rentry.co",
|
|
152
|
+
"zerobin.net",
|
|
153
|
+
"textbin.net",
|
|
154
|
+
"termbin.com",
|
|
155
|
+
"sprunge.us",
|
|
156
|
+
"clbin.com",
|
|
157
|
+
"ix.io",
|
|
158
|
+
"pastetext.net",
|
|
159
|
+
"pastie.org",
|
|
160
|
+
"ideone.com",
|
|
161
|
+
# File sharing services (LOTS: Download, Exfiltration)
|
|
162
|
+
"transfer.sh",
|
|
163
|
+
"filebin.net",
|
|
164
|
+
"gofile.io",
|
|
165
|
+
"anonfiles.com",
|
|
166
|
+
"mediafire.com",
|
|
167
|
+
"mega.nz",
|
|
168
|
+
"wetransfer.com",
|
|
169
|
+
"filetransfer.io",
|
|
170
|
+
"ufile.io",
|
|
171
|
+
"4sync.com",
|
|
172
|
+
"uplooder.net",
|
|
173
|
+
"filecloudonline.com",
|
|
174
|
+
"sendspace.com",
|
|
175
|
+
"siasky.net",
|
|
176
|
+
# Tunneling/webhook services (LOTS: C&C, Exfiltration)
|
|
177
|
+
"webhook.site",
|
|
178
|
+
"requestbin",
|
|
179
|
+
"ngrok.io",
|
|
180
|
+
"pipedream.net",
|
|
181
|
+
"localhost.run",
|
|
182
|
+
"trycloudflare.com",
|
|
183
|
+
# Code execution services (LOTS: C&C, Download)
|
|
184
|
+
"codepen.io",
|
|
185
|
+
"repl.co",
|
|
186
|
+
"glitch.me",
|
|
187
|
+
# Explicitly malicious example domains
|
|
188
|
+
"attacker.example.com",
|
|
189
|
+
"evil.example.com",
|
|
190
|
+
"malicious.com",
|
|
191
|
+
"c2-server.com",
|
|
192
|
+
]
|
|
145
193
|
|
|
146
|
-
#
|
|
194
|
+
# Domains that are always safe (not flagged even if matched by SUSPICIOUS_DOMAINS pattern)
|
|
195
|
+
# NOTE: We intentionally exclude file-hosting/messaging services that appear in LOTS
|
|
196
|
+
# (https://lots-project.com/) with Download/C&C capabilities, even if commonly used.
|
|
147
197
|
LEGITIMATE_DOMAINS = [
|
|
148
|
-
# AI provider services
|
|
198
|
+
# AI provider services (API endpoints only, not user content)
|
|
149
199
|
"api.anthropic.com",
|
|
150
200
|
"statsig.anthropic.com",
|
|
151
|
-
|
|
152
|
-
"
|
|
153
|
-
"
|
|
154
|
-
"
|
|
155
|
-
# Package registries
|
|
201
|
+
"api.openai.com",
|
|
202
|
+
"api.together.xyz",
|
|
203
|
+
"api.cohere.ai",
|
|
204
|
+
"generativelanguage.googleapis.com",
|
|
205
|
+
# Package registries (read-only, no user-uploaded executables)
|
|
156
206
|
"registry.npmjs.org",
|
|
157
207
|
"npmjs.com",
|
|
158
208
|
"npmjs.org",
|
|
@@ -161,18 +211,43 @@ class ContextExtractor:
|
|
|
161
211
|
"pypi.org",
|
|
162
212
|
"files.pythonhosted.org",
|
|
163
213
|
"pythonhosted.org",
|
|
214
|
+
"crates.io",
|
|
215
|
+
"rubygems.org",
|
|
216
|
+
"pkg.go.dev",
|
|
164
217
|
# System packages
|
|
165
218
|
"archive.ubuntu.com",
|
|
166
219
|
"security.ubuntu.com",
|
|
220
|
+
"debian.org",
|
|
167
221
|
# XML schemas (for OOXML document processing)
|
|
168
222
|
"schemas.microsoft.com",
|
|
169
223
|
"schemas.openxmlformats.org",
|
|
170
224
|
"www.w3.org",
|
|
171
225
|
"purl.org",
|
|
226
|
+
"json-schema.org",
|
|
172
227
|
# Localhost and development
|
|
173
228
|
"localhost",
|
|
174
229
|
"127.0.0.1",
|
|
175
230
|
"0.0.0.0",
|
|
231
|
+
"::1",
|
|
232
|
+
# Common safe services (API-focused, not file hosting)
|
|
233
|
+
"stripe.com",
|
|
234
|
+
"zoom.us",
|
|
235
|
+
"twilio.com",
|
|
236
|
+
"mailgun.com",
|
|
237
|
+
"sentry.io",
|
|
238
|
+
"datadog.com",
|
|
239
|
+
"newrelic.com",
|
|
240
|
+
"elastic.co",
|
|
241
|
+
"mongodb.com",
|
|
242
|
+
"redis.io",
|
|
243
|
+
"postgresql.org",
|
|
244
|
+
# NOTE: The following are intentionally NOT in this list due to LOTS risk:
|
|
245
|
+
# - github.com, gitlab.com, bitbucket.org (Download, C&C)
|
|
246
|
+
# - raw.githubusercontent.com (Download, C&C)
|
|
247
|
+
# - discord.com, telegram.org, slack.com (C&C, Exfil)
|
|
248
|
+
# - amazonaws.com, googleapis.com, azure.com, cloudflare.com (wildcard hosting)
|
|
249
|
+
# - google.com, microsoft.com (too broad, includes file hosting)
|
|
250
|
+
# - sendgrid.com (email tracking/download)
|
|
176
251
|
]
|
|
177
252
|
|
|
178
253
|
def extract_context(self, file_path: Path, source_code: str) -> SkillScriptContext:
|
|
@@ -259,7 +334,8 @@ class ContextExtractor:
|
|
|
259
334
|
# Also collect module-level strings (class attributes, etc.)
|
|
260
335
|
all_strings.extend(parser.module_strings)
|
|
261
336
|
|
|
262
|
-
# Find suspicious URLs
|
|
337
|
+
# Find suspicious URLs - ONLY flag URLs to known-bad destinations
|
|
338
|
+
# Don't flag unknown URLs - that creates too many false positives
|
|
263
339
|
suspicious_urls = []
|
|
264
340
|
for s in all_strings:
|
|
265
341
|
# Skip if not URL-like or contains newlines (docstrings)
|
|
@@ -271,12 +347,10 @@ class ContextExtractor:
|
|
|
271
347
|
# Skip if contains legitimate domain
|
|
272
348
|
if any(domain in s for domain in self.LEGITIMATE_DOMAINS):
|
|
273
349
|
continue
|
|
274
|
-
#
|
|
350
|
+
# ONLY flag if URL contains a known suspicious domain
|
|
351
|
+
# Don't flag all unknown URLs - that's too aggressive
|
|
275
352
|
if any(domain in s for domain in self.SUSPICIOUS_DOMAINS):
|
|
276
353
|
suspicious_urls.append(s)
|
|
277
|
-
# Generic URLs only if they look suspicious (not just schema URLs)
|
|
278
|
-
elif not any(schema in s for schema in ["schemas.", "www.w3.org", "xmlns"]):
|
|
279
|
-
suspicious_urls.append(s)
|
|
280
354
|
|
|
281
355
|
# Create context
|
|
282
356
|
context = SkillScriptContext(
|