cisco-ai-skill-scanner 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cisco_ai_skill_scanner-1.0.0.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/METADATA +28 -13
- cisco_ai_skill_scanner-1.0.2.dist-info/RECORD +102 -0
- cisco_ai_skill_scanner-1.0.2.dist-info/entry_points.txt +4 -0
- {skillanalyzer → skill_scanner}/__init__.py +8 -4
- {skillanalyzer → skill_scanner}/_version.py +2 -2
- {skillanalyzer → skill_scanner}/api/__init__.py +1 -1
- {skillanalyzer → skill_scanner}/api/api.py +4 -4
- {skillanalyzer → skill_scanner}/api/api_cli.py +8 -8
- {skillanalyzer → skill_scanner}/api/api_server.py +7 -7
- {skillanalyzer → skill_scanner}/api/router.py +3 -3
- {skillanalyzer → skill_scanner}/cli/__init__.py +1 -1
- {skillanalyzer → skill_scanner}/cli/cli.py +71 -13
- {skillanalyzer → skill_scanner}/config/__init__.py +3 -3
- {skillanalyzer → skill_scanner}/config/config.py +2 -2
- {skillanalyzer → skill_scanner}/config/config_parser.py +9 -9
- {skillanalyzer → skill_scanner}/config/constants.py +2 -2
- skill_scanner/config/yara_modes.py +314 -0
- {skillanalyzer → skill_scanner}/core/__init__.py +1 -1
- {skillanalyzer → skill_scanner}/core/analyzers/__init__.py +3 -3
- {skillanalyzer → skill_scanner}/core/analyzers/aidefense_analyzer.py +3 -3
- {skillanalyzer → skill_scanner}/core/analyzers/behavioral/__init__.py +1 -1
- {skillanalyzer → skill_scanner}/core/analyzers/behavioral/alignment/alignment_llm_client.py +1 -1
- {skillanalyzer → skill_scanner}/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +2 -2
- {skillanalyzer → skill_scanner}/core/analyzers/behavioral_analyzer.py +1 -1
- skillanalyzer/core/analyzers/cross_skill_analyzer.py → skill_scanner/core/analyzers/cross_skill_scanner.py +5 -5
- {skillanalyzer → skill_scanner}/core/analyzers/llm_analyzer.py +4 -4
- {skillanalyzer → skill_scanner}/core/analyzers/llm_prompt_builder.py +2 -2
- {skillanalyzer → skill_scanner}/core/analyzers/meta_analyzer.py +52 -20
- {skillanalyzer → skill_scanner}/core/analyzers/static.py +185 -35
- {skillanalyzer → skill_scanner}/core/analyzers/trigger_analyzer.py +2 -2
- {skillanalyzer → skill_scanner}/core/exceptions.py +10 -10
- {skillanalyzer → skill_scanner}/core/loader.py +4 -4
- {skillanalyzer → skill_scanner}/core/models.py +7 -6
- {skillanalyzer → skill_scanner}/core/reporters/markdown_reporter.py +11 -5
- {skillanalyzer → skill_scanner}/core/reporters/sarif_reporter.py +2 -2
- {skillanalyzer → skill_scanner}/core/reporters/table_reporter.py +2 -2
- {skillanalyzer → skill_scanner}/core/rules/yara_scanner.py +1 -1
- {skillanalyzer → skill_scanner}/core/scanner.py +2 -2
- {skillanalyzer → skill_scanner}/core/static_analysis/context_extractor.py +88 -14
- {skillanalyzer → skill_scanner}/core/static_analysis/dataflow/__init__.py +1 -1
- {skillanalyzer → skill_scanner}/core/static_analysis/interprocedural/call_graph_analyzer.py +2 -2
- {skillanalyzer → skill_scanner}/core/static_analysis/parser/python_parser.py +5 -5
- {skillanalyzer → skill_scanner}/data/__init__.py +1 -1
- {skillanalyzer → skill_scanner}/data/prompts/boilerplate_protection_rule_prompt.md +5 -5
- {skillanalyzer → skill_scanner}/data/prompts/code_alignment_threat_analysis_prompt.md +128 -53
- {skillanalyzer → skill_scanner}/data/prompts/llm_response_schema.json +3 -3
- {skillanalyzer → skill_scanner}/data/prompts/skill_meta_analysis_prompt.md +16 -15
- {skillanalyzer → skill_scanner}/data/prompts/skill_threat_analysis_prompt.md +53 -17
- {skillanalyzer → skill_scanner}/data/prompts/unified_response_schema.md +1 -1
- {skillanalyzer → skill_scanner}/data/rules/signatures.yaml +143 -37
- skill_scanner/data/yara_rules/autonomy_abuse_generic.yara +66 -0
- skillanalyzer/data/yara_rules/skill_discovery_abuse.yara → skill_scanner/data/yara_rules/capability_inflation_generic.yara +7 -4
- skill_scanner/data/yara_rules/code_execution_generic.yara +76 -0
- skillanalyzer/data/yara_rules/coercive_injection.yara → skill_scanner/data/yara_rules/coercive_injection_generic.yara +2 -2
- skill_scanner/data/yara_rules/command_injection_generic.yara +77 -0
- skillanalyzer/data/yara_rules/credential_harvesting.yara → skill_scanner/data/yara_rules/credential_harvesting_generic.yara +25 -4
- skillanalyzer/data/yara_rules/transitive_trust_abuse.yara → skill_scanner/data/yara_rules/indirect_prompt_injection_generic.yara +8 -5
- skillanalyzer/data/yara_rules/prompt_injection.yara → skill_scanner/data/yara_rules/prompt_injection_generic.yara +2 -2
- skillanalyzer/data/yara_rules/unicode_steganography.yara → skill_scanner/data/yara_rules/prompt_injection_unicode_steganography.yara +23 -17
- skill_scanner/data/yara_rules/script_injection_generic.yara +82 -0
- skillanalyzer/data/yara_rules/sql_injection.yara → skill_scanner/data/yara_rules/sql_injection_generic.yara +22 -8
- skill_scanner/data/yara_rules/system_manipulation_generic.yara +79 -0
- skill_scanner/data/yara_rules/tool_chaining_abuse_generic.yara +72 -0
- {skillanalyzer → skill_scanner}/hooks/__init__.py +1 -1
- {skillanalyzer → skill_scanner}/hooks/pre_commit.py +16 -16
- {skillanalyzer → skill_scanner}/threats/__init__.py +25 -3
- skill_scanner/threats/cisco_ai_taxonomy.py +274 -0
- {skillanalyzer → skill_scanner}/threats/threats.py +28 -99
- {skillanalyzer → skill_scanner}/utils/__init__.py +1 -1
- {skillanalyzer → skill_scanner}/utils/command_utils.py +1 -1
- {skillanalyzer → skill_scanner}/utils/di_container.py +1 -1
- {skillanalyzer → skill_scanner}/utils/logging_config.py +7 -7
- cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +0 -100
- cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +0 -4
- skillanalyzer/data/yara_rules/autonomy_abuse.yara +0 -66
- skillanalyzer/data/yara_rules/code_execution.yara +0 -61
- skillanalyzer/data/yara_rules/command_injection.yara +0 -54
- skillanalyzer/data/yara_rules/script_injection.yara +0 -83
- skillanalyzer/data/yara_rules/system_manipulation.yara +0 -65
- skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +0 -60
- {cisco_ai_skill_scanner-1.0.0.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/WHEEL +0 -0
- {cisco_ai_skill_scanner-1.0.0.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/base.py +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/behavioral/alignment/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/behavioral/alignment/alignment_orchestrator.py +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/behavioral/alignment/alignment_response_validator.py +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/llm_provider_config.py +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/llm_request_handler.py +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/llm_response_parser.py +0 -0
- {skillanalyzer → skill_scanner}/core/analyzers/virustotal_analyzer.py +0 -0
- {skillanalyzer → skill_scanner}/core/reporters/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/reporters/json_reporter.py +0 -0
- {skillanalyzer → skill_scanner}/core/rules/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/rules/patterns.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/cfg/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/cfg/builder.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/dataflow/forward_analysis.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/interprocedural/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/interprocedural/cross_file_analyzer.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/parser/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/semantic/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/semantic/name_resolver.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/semantic/type_analyzer.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/taint/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/taint/tracker.py +0 -0
- {skillanalyzer → skill_scanner}/core/static_analysis/types/__init__.py +0 -0
- {skillanalyzer → skill_scanner}/utils/file_utils.py +0 -0
- {skillanalyzer → skill_scanner}/utils/logging_utils.py +0 -0
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# SPDX-License-Identifier: Apache-2.0
|
|
16
16
|
|
|
17
17
|
"""
|
|
18
|
-
LLM Meta-Analyzer for
|
|
18
|
+
LLM Meta-Analyzer for Agent Skills Security Scanner.
|
|
19
19
|
|
|
20
20
|
Performs second-pass LLM analysis on findings from multiple analyzers to:
|
|
21
21
|
- Filter false positives based on contextual understanding
|
|
@@ -339,7 +339,7 @@ class MetaAnalyzer(BaseAnalyzer):
|
|
|
339
339
|
|
|
340
340
|
def _get_default_system_prompt(self) -> str:
|
|
341
341
|
"""Get default system prompt if file not found."""
|
|
342
|
-
return """You are a senior security analyst performing meta-analysis on
|
|
342
|
+
return """You are a senior security analyst performing meta-analysis on Agent Skill security findings.
|
|
343
343
|
Your role is to review findings from multiple analyzers, identify false positives,
|
|
344
344
|
prioritize by actual risk, correlate related issues, and provide actionable recommendations.
|
|
345
345
|
|
|
@@ -787,29 +787,42 @@ def apply_meta_analysis_to_results(
|
|
|
787
787
|
meta_result: MetaAnalysisResult,
|
|
788
788
|
skill: Skill,
|
|
789
789
|
) -> list[Finding]:
|
|
790
|
-
"""Apply meta-analysis results to
|
|
790
|
+
"""Apply meta-analysis results to enrich all findings with metadata.
|
|
791
791
|
|
|
792
792
|
This function:
|
|
793
|
-
1.
|
|
793
|
+
1. Marks false positives with metadata (but keeps them in output)
|
|
794
794
|
2. Adds meta-analysis enrichments to validated findings
|
|
795
795
|
3. Adds any new threats detected by meta-analyzer
|
|
796
796
|
|
|
797
|
+
All findings are retained in the output with metadata indicating whether
|
|
798
|
+
they were identified as false positives. This allows downstream consumers
|
|
799
|
+
(like VS Code extensions) to filter or display them as needed.
|
|
800
|
+
|
|
797
801
|
Args:
|
|
798
802
|
original_findings: Original findings from all analyzers
|
|
799
803
|
meta_result: Results from meta-analysis
|
|
800
804
|
skill: The skill being analyzed
|
|
801
805
|
|
|
802
806
|
Returns:
|
|
803
|
-
|
|
807
|
+
All findings with meta-analysis metadata added
|
|
804
808
|
"""
|
|
805
|
-
# Build
|
|
806
|
-
|
|
809
|
+
# Build false positive lookup with reasons and metadata
|
|
810
|
+
fp_data: dict[int, dict[str, Any]] = {}
|
|
807
811
|
for fp in meta_result.false_positives:
|
|
808
812
|
if "_index" in fp:
|
|
809
|
-
|
|
813
|
+
fp_data[fp["_index"]] = {
|
|
814
|
+
"reason": fp.get("reason") or fp.get("false_positive_reason") or "Identified as likely false positive",
|
|
815
|
+
"confidence": fp.get("confidence"),
|
|
816
|
+
}
|
|
810
817
|
|
|
811
818
|
# Build enrichment lookup from validated findings
|
|
812
|
-
enrichments = {}
|
|
819
|
+
enrichments: dict[int, dict[str, Any]] = {}
|
|
820
|
+
priority_lookup: dict[int, int] = {}
|
|
821
|
+
|
|
822
|
+
# Build priority rank lookup from priority_order
|
|
823
|
+
for rank, idx in enumerate(meta_result.priority_order, start=1):
|
|
824
|
+
priority_lookup[idx] = rank
|
|
825
|
+
|
|
813
826
|
for vf in meta_result.validated_findings:
|
|
814
827
|
idx = vf.get("_index")
|
|
815
828
|
if idx is not None:
|
|
@@ -821,25 +834,44 @@ def apply_meta_analysis_to_results(
|
|
|
821
834
|
"meta_impact": vf.get("impact"),
|
|
822
835
|
}
|
|
823
836
|
|
|
824
|
-
#
|
|
837
|
+
# Enrich all findings (do not filter out false positives)
|
|
825
838
|
result_findings = []
|
|
826
839
|
for i, finding in enumerate(original_findings):
|
|
827
|
-
#
|
|
828
|
-
if
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
#
|
|
832
|
-
if i in
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
840
|
+
# Ensure metadata dict exists
|
|
841
|
+
if finding.metadata is None:
|
|
842
|
+
finding.metadata = {}
|
|
843
|
+
|
|
844
|
+
# Mark false positives with metadata (but keep them in output)
|
|
845
|
+
if i in fp_data:
|
|
846
|
+
finding.metadata["meta_false_positive"] = True
|
|
847
|
+
finding.metadata["meta_reason"] = fp_data[i]["reason"]
|
|
848
|
+
if fp_data[i].get("confidence") is not None:
|
|
849
|
+
finding.metadata["meta_confidence"] = fp_data[i]["confidence"]
|
|
836
850
|
else:
|
|
837
|
-
|
|
851
|
+
# Mark as validated (not a false positive)
|
|
852
|
+
finding.metadata["meta_false_positive"] = False
|
|
853
|
+
|
|
854
|
+
# Add enrichments if available for validated findings
|
|
855
|
+
if i in enrichments:
|
|
856
|
+
for key, value in enrichments[i].items():
|
|
857
|
+
if value is not None:
|
|
858
|
+
finding.metadata[key] = value
|
|
859
|
+
else:
|
|
860
|
+
finding.metadata["meta_reviewed"] = True
|
|
861
|
+
|
|
862
|
+
# Add priority rank if available
|
|
863
|
+
if i in priority_lookup:
|
|
864
|
+
finding.metadata["meta_priority"] = priority_lookup[i]
|
|
838
865
|
|
|
839
866
|
result_findings.append(finding)
|
|
840
867
|
|
|
841
868
|
# Add missed threats as new findings
|
|
842
869
|
missed_findings = meta_result.get_missed_threats(skill)
|
|
870
|
+
for mf in missed_findings:
|
|
871
|
+
# Ensure missed threats are marked as validated (not false positives)
|
|
872
|
+
if mf.metadata is None:
|
|
873
|
+
mf.metadata = {}
|
|
874
|
+
mf.metadata["meta_false_positive"] = False
|
|
843
875
|
result_findings.extend(missed_findings)
|
|
844
876
|
|
|
845
877
|
return result_findings
|
|
@@ -24,6 +24,7 @@ import re
|
|
|
24
24
|
from pathlib import Path
|
|
25
25
|
from typing import Any
|
|
26
26
|
|
|
27
|
+
from ...config.yara_modes import DEFAULT_YARA_MODE, YaraModeConfig
|
|
27
28
|
from ...core.models import Finding, Severity, Skill, ThreatCategory
|
|
28
29
|
from ...core.rules.patterns import RuleLoader, SecurityRule
|
|
29
30
|
from ...core.rules.yara_scanner import YaraScanner
|
|
@@ -91,28 +92,93 @@ _RM_TARGET_PATTERN = re.compile(r"rm\s+-r[^;]*?\s+([^\s;]+)")
|
|
|
91
92
|
class StaticAnalyzer(BaseAnalyzer):
|
|
92
93
|
"""Static pattern-based security analyzer."""
|
|
93
94
|
|
|
94
|
-
def __init__(
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
rules_file: Path | None = None,
|
|
98
|
+
use_yara: bool = True,
|
|
99
|
+
yara_mode: YaraModeConfig | str | None = None,
|
|
100
|
+
custom_yara_rules_path: str | Path | None = None,
|
|
101
|
+
disabled_rules: set[str] | None = None,
|
|
102
|
+
):
|
|
95
103
|
"""
|
|
96
104
|
Initialize static analyzer.
|
|
97
105
|
|
|
98
106
|
Args:
|
|
99
|
-
rules_file: Optional custom rules file
|
|
107
|
+
rules_file: Optional custom YAML rules file
|
|
100
108
|
use_yara: Whether to use YARA scanning (default: True)
|
|
109
|
+
yara_mode: YARA detection mode - can be:
|
|
110
|
+
- YaraModeConfig instance
|
|
111
|
+
- Mode name string: "strict", "balanced", "permissive"
|
|
112
|
+
- None for default (balanced)
|
|
113
|
+
custom_yara_rules_path: Path to directory containing custom YARA rules
|
|
114
|
+
(.yara files). If provided, uses these instead of built-in rules.
|
|
115
|
+
disabled_rules: Set of rule names to disable. Rules can be YARA rule
|
|
116
|
+
names (e.g., "YARA_script_injection") or static rule IDs
|
|
117
|
+
(e.g., "COMMAND_INJECTION_EVAL").
|
|
101
118
|
"""
|
|
102
119
|
super().__init__("static_analyzer")
|
|
103
120
|
|
|
104
121
|
self.rule_loader = RuleLoader(rules_file)
|
|
105
122
|
self.rule_loader.load_rules()
|
|
106
123
|
|
|
124
|
+
# Configure YARA mode
|
|
125
|
+
if yara_mode is None:
|
|
126
|
+
self.yara_mode = DEFAULT_YARA_MODE
|
|
127
|
+
elif isinstance(yara_mode, str):
|
|
128
|
+
self.yara_mode = YaraModeConfig.from_mode_name(yara_mode)
|
|
129
|
+
else:
|
|
130
|
+
self.yara_mode = yara_mode
|
|
131
|
+
|
|
132
|
+
# Store disabled rules (merge with mode-based disabled rules)
|
|
133
|
+
self.disabled_rules = set(disabled_rules or set())
|
|
134
|
+
self.disabled_rules.update(self.yara_mode.disabled_rules)
|
|
135
|
+
|
|
136
|
+
# Store custom YARA rules path
|
|
137
|
+
self.custom_yara_rules_path = Path(custom_yara_rules_path) if custom_yara_rules_path else None
|
|
138
|
+
|
|
107
139
|
self.use_yara = use_yara
|
|
108
140
|
self.yara_scanner = None
|
|
109
141
|
if use_yara:
|
|
110
142
|
try:
|
|
111
|
-
|
|
143
|
+
# Use custom rules path if provided
|
|
144
|
+
if self.custom_yara_rules_path:
|
|
145
|
+
self.yara_scanner = YaraScanner(rules_dir=self.custom_yara_rules_path)
|
|
146
|
+
logger.info("Using custom YARA rules from: %s", self.custom_yara_rules_path)
|
|
147
|
+
else:
|
|
148
|
+
self.yara_scanner = YaraScanner()
|
|
112
149
|
except Exception as e:
|
|
113
150
|
logger.warning("Could not load YARA scanner: %s", e)
|
|
114
151
|
self.yara_scanner = None
|
|
115
152
|
|
|
153
|
+
def _is_rule_enabled(self, rule_name: str) -> bool:
|
|
154
|
+
"""
|
|
155
|
+
Check if a rule is enabled.
|
|
156
|
+
|
|
157
|
+
A rule is enabled if:
|
|
158
|
+
1. It's enabled in the current YARA mode
|
|
159
|
+
2. It's not in the explicitly disabled rules set
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
rule_name: Name of the rule to check (e.g., "YARA_script_injection")
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
True if the rule is enabled, False otherwise
|
|
166
|
+
"""
|
|
167
|
+
# Check mode-based enable/disable first
|
|
168
|
+
if not self.yara_mode.is_rule_enabled(rule_name):
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
# Check if explicitly disabled via --disable-rule
|
|
172
|
+
if rule_name in self.disabled_rules:
|
|
173
|
+
return False
|
|
174
|
+
|
|
175
|
+
# Also check without YARA_ prefix for convenience
|
|
176
|
+
base_name = rule_name.replace("YARA_", "") if rule_name.startswith("YARA_") else rule_name
|
|
177
|
+
if base_name in self.disabled_rules:
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
return True
|
|
181
|
+
|
|
116
182
|
def analyze(self, skill: Skill) -> list[Finding]:
|
|
117
183
|
"""
|
|
118
184
|
Analyze skill using static pattern matching.
|
|
@@ -144,6 +210,10 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
144
210
|
|
|
145
211
|
findings.extend(self._scan_asset_files(skill))
|
|
146
212
|
|
|
213
|
+
# Filter out disabled rules
|
|
214
|
+
if self.disabled_rules:
|
|
215
|
+
findings = [f for f in findings if self._is_rule_enabled(f.rule_id)]
|
|
216
|
+
|
|
147
217
|
return findings
|
|
148
218
|
|
|
149
219
|
def _check_manifest(self, skill: Skill) -> list[Finding]:
|
|
@@ -157,10 +227,10 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
157
227
|
id=self._generate_finding_id("MANIFEST_INVALID_NAME", "manifest"),
|
|
158
228
|
rule_id="MANIFEST_INVALID_NAME",
|
|
159
229
|
category=ThreatCategory.POLICY_VIOLATION,
|
|
160
|
-
severity=Severity.
|
|
161
|
-
title="Skill name does not follow
|
|
230
|
+
severity=Severity.INFO,
|
|
231
|
+
title="Skill name does not follow agent skills naming rules",
|
|
162
232
|
description=(
|
|
163
|
-
f"Skill name '{manifest.name}' is invalid.
|
|
233
|
+
f"Skill name '{manifest.name}' is invalid. Agent skills require lowercase letters, numbers, "
|
|
164
234
|
f"and hyphens only, with a maximum length of 64 characters."
|
|
165
235
|
),
|
|
166
236
|
file_path="SKILL.md",
|
|
@@ -176,9 +246,9 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
176
246
|
rule_id="MANIFEST_DESCRIPTION_TOO_LONG",
|
|
177
247
|
category=ThreatCategory.POLICY_VIOLATION,
|
|
178
248
|
severity=Severity.LOW,
|
|
179
|
-
title="Skill description exceeds
|
|
249
|
+
title="Skill description exceeds agent skills length limit",
|
|
180
250
|
description=(
|
|
181
|
-
f"Skill description is {len(manifest.description)} characters;
|
|
251
|
+
f"Skill description is {len(manifest.description)} characters; Agent skills limit the "
|
|
182
252
|
f"`description` field to 1024 characters."
|
|
183
253
|
),
|
|
184
254
|
file_path="SKILL.md",
|
|
@@ -220,7 +290,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
220
290
|
title="Potential Anthropic brand impersonation",
|
|
221
291
|
description="Skill name or description contains 'Anthropic', suggesting official affiliation",
|
|
222
292
|
file_path="SKILL.md",
|
|
223
|
-
remediation="Do not impersonate official
|
|
293
|
+
remediation="Do not impersonate official skills or use unauthorized branding",
|
|
224
294
|
analyzer="static",
|
|
225
295
|
)
|
|
226
296
|
)
|
|
@@ -232,10 +302,10 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
232
302
|
rule_id="SOCIAL_ENG_ANTHROPIC_IMPERSONATION",
|
|
233
303
|
category=ThreatCategory.SOCIAL_ENGINEERING,
|
|
234
304
|
severity=Severity.HIGH,
|
|
235
|
-
title="Claims to be official
|
|
236
|
-
description="Skill claims to be an 'official'
|
|
305
|
+
title="Claims to be official skill",
|
|
306
|
+
description="Skill claims to be an 'official' skill",
|
|
237
307
|
file_path="SKILL.md",
|
|
238
|
-
remediation="Remove 'official' claims unless authorized
|
|
308
|
+
remediation="Remove 'official' claims unless properly authorized",
|
|
239
309
|
analyzer="static",
|
|
240
310
|
)
|
|
241
311
|
)
|
|
@@ -246,7 +316,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
246
316
|
id=self._generate_finding_id("MANIFEST_MISSING_LICENSE", "manifest"),
|
|
247
317
|
rule_id="MANIFEST_MISSING_LICENSE",
|
|
248
318
|
category=ThreatCategory.POLICY_VIOLATION,
|
|
249
|
-
severity=Severity.
|
|
319
|
+
severity=Severity.INFO,
|
|
250
320
|
title="Skill does not specify a license",
|
|
251
321
|
description="Skill manifest does not include a 'license' field. Specifying a license helps users understand usage terms.",
|
|
252
322
|
file_path="SKILL.md",
|
|
@@ -553,7 +623,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
553
623
|
]
|
|
554
624
|
|
|
555
625
|
socket_external_indicators = ["socket.connect", "socket.create_connection"]
|
|
556
|
-
socket_localhost_indicators = ["localhost", "127.0.0.1", "
|
|
626
|
+
socket_localhost_indicators = ["localhost", "127.0.0.1", "::1"]
|
|
557
627
|
|
|
558
628
|
for skill_file in skill.get_scripts():
|
|
559
629
|
content = skill_file.read_content()
|
|
@@ -622,8 +692,8 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
622
692
|
Finding(
|
|
623
693
|
id=self._generate_finding_id("ALLOWED_TOOLS_WRITE_VIOLATION", skill.name),
|
|
624
694
|
rule_id="ALLOWED_TOOLS_WRITE_VIOLATION",
|
|
625
|
-
category=ThreatCategory.
|
|
626
|
-
severity=Severity.
|
|
695
|
+
category=ThreatCategory.POLICY_VIOLATION,
|
|
696
|
+
severity=Severity.MEDIUM,
|
|
627
697
|
title="Skill declares no Write tool but bundled scripts write files",
|
|
628
698
|
description=(
|
|
629
699
|
f"Skill restricts tools to {skill.manifest.allowed_tools} but bundled scripts appear to "
|
|
@@ -651,22 +721,11 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
651
721
|
)
|
|
652
722
|
)
|
|
653
723
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
id=self._generate_finding_id("ALLOWED_TOOLS_PYTHON_VIOLATION", skill.name),
|
|
660
|
-
rule_id="ALLOWED_TOOLS_PYTHON_VIOLATION",
|
|
661
|
-
category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
|
|
662
|
-
severity=Severity.HIGH,
|
|
663
|
-
title="Python scripts present but Python tool not in allowed-tools",
|
|
664
|
-
description=f"Skill restricts tools to {skill.manifest.allowed_tools} but includes Python scripts",
|
|
665
|
-
file_path=None,
|
|
666
|
-
remediation="Add 'Python' to allowed-tools or remove Python scripts",
|
|
667
|
-
analyzer="static",
|
|
668
|
-
)
|
|
669
|
-
)
|
|
724
|
+
# Note: ALLOWED_TOOLS_PYTHON_VIOLATION removed - too many false positives
|
|
725
|
+
# Many skills include Python helper scripts that are NOT invoked directly by the agent
|
|
726
|
+
# (e.g., build scripts, test files, utilities). The allowed-tools list controls what
|
|
727
|
+
# the AGENT can use, not what helper scripts exist in the repo.
|
|
728
|
+
# If direct Python execution is a concern, COMMAND_INJECTION_EVAL catches actual risks.
|
|
670
729
|
|
|
671
730
|
if "grep" not in allowed_tools_lower:
|
|
672
731
|
if self._code_uses_grep(skill):
|
|
@@ -927,6 +986,10 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
927
986
|
|
|
928
987
|
yara_matches = self.yara_scanner.scan_content(skill.instruction_body, "SKILL.md")
|
|
929
988
|
for match in yara_matches:
|
|
989
|
+
rule_name = match.get("rule_name", "")
|
|
990
|
+
# Check if rule is enabled in current mode and not explicitly disabled
|
|
991
|
+
if not self._is_rule_enabled(rule_name):
|
|
992
|
+
continue
|
|
930
993
|
findings.extend(self._create_findings_from_yara_match(match, skill))
|
|
931
994
|
|
|
932
995
|
for skill_file in skill.get_scripts():
|
|
@@ -935,7 +998,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
935
998
|
yara_matches = self.yara_scanner.scan_content(content, skill_file.relative_path)
|
|
936
999
|
for match in yara_matches:
|
|
937
1000
|
rule_name = match.get("rule_name", "")
|
|
938
|
-
if rule_name == "
|
|
1001
|
+
if rule_name == "capability_inflation_generic":
|
|
939
1002
|
continue
|
|
940
1003
|
findings.extend(self._create_findings_from_yara_match(match, skill, content))
|
|
941
1004
|
|
|
@@ -1006,8 +1069,28 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
1006
1069
|
".cache",
|
|
1007
1070
|
}
|
|
1008
1071
|
|
|
1072
|
+
PLACEHOLDER_MARKERS = {
|
|
1073
|
+
"your-",
|
|
1074
|
+
"your_",
|
|
1075
|
+
"your ",
|
|
1076
|
+
"example",
|
|
1077
|
+
"sample",
|
|
1078
|
+
"dummy",
|
|
1079
|
+
"placeholder",
|
|
1080
|
+
"replace",
|
|
1081
|
+
"changeme",
|
|
1082
|
+
"change_me",
|
|
1083
|
+
"<your",
|
|
1084
|
+
"<insert",
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1009
1087
|
for string_match in match["strings"]:
|
|
1010
|
-
|
|
1088
|
+
# Skip exclusion patterns (these are used in YARA conditions but shouldn't create findings)
|
|
1089
|
+
string_identifier = string_match.get("identifier", "")
|
|
1090
|
+
if string_identifier.startswith("$documentation") or string_identifier.startswith("$safe"):
|
|
1091
|
+
continue
|
|
1092
|
+
|
|
1093
|
+
if rule_name == "code_execution_generic":
|
|
1011
1094
|
line_content = string_match.get("line_content", "").lower()
|
|
1012
1095
|
matched_data = string_match.get("matched_data", "").lower()
|
|
1013
1096
|
|
|
@@ -1028,7 +1111,7 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
1028
1111
|
if is_safe_command:
|
|
1029
1112
|
continue
|
|
1030
1113
|
|
|
1031
|
-
if rule_name == "
|
|
1114
|
+
if rule_name == "system_manipulation_generic":
|
|
1032
1115
|
line_content = string_match.get("line_content", "").lower()
|
|
1033
1116
|
|
|
1034
1117
|
if "rm -rf" in line_content or "rm -r" in line_content:
|
|
@@ -1040,6 +1123,73 @@ class StaticAnalyzer(BaseAnalyzer):
|
|
|
1040
1123
|
if all_safe:
|
|
1041
1124
|
continue
|
|
1042
1125
|
|
|
1126
|
+
# Credential harvesting post-filters (controlled by mode)
|
|
1127
|
+
if rule_name == "credential_harvesting_generic":
|
|
1128
|
+
if self.yara_mode.credential_harvesting.filter_placeholder_patterns:
|
|
1129
|
+
line_content = string_match.get("line_content", "")
|
|
1130
|
+
matched_data = string_match.get("matched_data", "")
|
|
1131
|
+
combined = f"{line_content} {matched_data}".lower()
|
|
1132
|
+
|
|
1133
|
+
if any(marker in combined for marker in PLACEHOLDER_MARKERS):
|
|
1134
|
+
continue
|
|
1135
|
+
|
|
1136
|
+
if "export " in combined and "=" in combined:
|
|
1137
|
+
_, value = combined.split("=", 1)
|
|
1138
|
+
if any(marker in value for marker in PLACEHOLDER_MARKERS):
|
|
1139
|
+
continue
|
|
1140
|
+
|
|
1141
|
+
# Tool chaining post-filters (controlled by mode)
|
|
1142
|
+
if rule_name == "tool_chaining_abuse_generic":
|
|
1143
|
+
line_content = string_match.get("line_content", "")
|
|
1144
|
+
lower_line = line_content.lower()
|
|
1145
|
+
exfil_hints = ("send", "upload", "transmit", "webhook", "slack", "exfil", "forward")
|
|
1146
|
+
|
|
1147
|
+
if self.yara_mode.tool_chaining.filter_generic_http_verbs:
|
|
1148
|
+
if (
|
|
1149
|
+
"get" in lower_line
|
|
1150
|
+
and "post" in lower_line
|
|
1151
|
+
and not any(hint in lower_line for hint in exfil_hints)
|
|
1152
|
+
):
|
|
1153
|
+
continue
|
|
1154
|
+
|
|
1155
|
+
if self.yara_mode.tool_chaining.filter_api_documentation:
|
|
1156
|
+
if any(
|
|
1157
|
+
token in line_content for token in ("@app.", "app.", "router.", "route", "endpoint")
|
|
1158
|
+
) and not any(hint in lower_line for hint in exfil_hints):
|
|
1159
|
+
continue
|
|
1160
|
+
|
|
1161
|
+
if self.yara_mode.tool_chaining.filter_email_field_mentions:
|
|
1162
|
+
if "by email" in lower_line or "email address" in lower_line or "email field" in lower_line:
|
|
1163
|
+
continue
|
|
1164
|
+
|
|
1165
|
+
# Unicode steganography post-filters
|
|
1166
|
+
if rule_name == "prompt_injection_unicode_steganography":
|
|
1167
|
+
line_content = string_match.get("line_content", "")
|
|
1168
|
+
matched_data = string_match.get("matched_data", "")
|
|
1169
|
+
has_ascii_letters = any("A" <= char <= "Z" or "a" <= char <= "z" for char in line_content)
|
|
1170
|
+
|
|
1171
|
+
# Filter short matches in non-Latin context (likely legitimate i18n)
|
|
1172
|
+
if len(matched_data) <= 2 and not has_ascii_letters:
|
|
1173
|
+
continue
|
|
1174
|
+
|
|
1175
|
+
# Filter if context suggests legitimate internationalization
|
|
1176
|
+
i18n_markers = ("i18n", "locale", "translation", "lang=", "charset", "utf-8", "encoding")
|
|
1177
|
+
if any(marker in line_content.lower() for marker in i18n_markers):
|
|
1178
|
+
continue
|
|
1179
|
+
|
|
1180
|
+
# Filter Cyrillic, CJK, Arabic, Hebrew text (legitimate non-Latin content)
|
|
1181
|
+
# These are indicated by presence of those scripts without zero-width chars
|
|
1182
|
+
cyrillic_cjk_pattern = any(
|
|
1183
|
+
("\u0400" <= char <= "\u04ff") # Cyrillic
|
|
1184
|
+
or ("\u4e00" <= char <= "\u9fff") # CJK Unified
|
|
1185
|
+
or ("\u0600" <= char <= "\u06ff") # Arabic
|
|
1186
|
+
or ("\u0590" <= char <= "\u05ff") # Hebrew
|
|
1187
|
+
for char in line_content
|
|
1188
|
+
)
|
|
1189
|
+
# If the line has legitimate non-Latin text but matched only 1-2 zero-width chars, skip
|
|
1190
|
+
if cyrillic_cjk_pattern and len(matched_data) < 10:
|
|
1191
|
+
continue
|
|
1192
|
+
|
|
1043
1193
|
finding_id = self._generate_finding_id(f"YARA_{rule_name}", f"{file_path}:{string_match['line_number']}")
|
|
1044
1194
|
|
|
1045
1195
|
description = meta.get("description", f"YARA rule {rule_name} matched")
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"""
|
|
18
18
|
Trigger analyzer for detecting overly generic skill descriptions.
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
AI agents use skill descriptions to decide when to activate a skill.
|
|
21
21
|
Overly generic descriptions can cause trigger hijacking where a skill
|
|
22
22
|
activates for unrelated user requests.
|
|
23
23
|
"""
|
|
@@ -220,7 +220,7 @@ class TriggerAnalyzer(BaseAnalyzer):
|
|
|
220
220
|
title="Skill description is too short",
|
|
221
221
|
description=(
|
|
222
222
|
f"Description has only {len(words)} words. "
|
|
223
|
-
f"Short descriptions may not provide enough context for
|
|
223
|
+
f"Short descriptions may not provide enough context for the agent to determine "
|
|
224
224
|
f"when this skill should be used."
|
|
225
225
|
),
|
|
226
226
|
file_path="SKILL.md",
|
|
@@ -14,14 +14,14 @@
|
|
|
14
14
|
#
|
|
15
15
|
# SPDX-License-Identifier: Apache-2.0
|
|
16
16
|
|
|
17
|
-
"""Skill
|
|
17
|
+
"""Skill Scanner exceptions.
|
|
18
18
|
|
|
19
|
-
This module defines custom exceptions for Skill
|
|
20
|
-
All exceptions inherit from
|
|
19
|
+
This module defines custom exceptions for Skill Scanner operations.
|
|
20
|
+
All exceptions inherit from SkillScannerError for easy catching.
|
|
21
21
|
|
|
22
22
|
Example:
|
|
23
|
-
>>> from
|
|
24
|
-
>>> from
|
|
23
|
+
>>> from skill_scanner import Scanner
|
|
24
|
+
>>> from skill_scanner.core.exceptions import SkillLoadError
|
|
25
25
|
>>>
|
|
26
26
|
>>> scanner = Scanner()
|
|
27
27
|
>>>
|
|
@@ -34,13 +34,13 @@ Example:
|
|
|
34
34
|
"""
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
class
|
|
38
|
-
"""Base exception for all Skill
|
|
37
|
+
class SkillScannerError(Exception):
|
|
38
|
+
"""Base exception for all Skill Scanner errors."""
|
|
39
39
|
|
|
40
40
|
pass
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
class SkillLoadError(
|
|
43
|
+
class SkillLoadError(SkillScannerError):
|
|
44
44
|
"""Raised when unable to load a skill package.
|
|
45
45
|
|
|
46
46
|
This can indicate:
|
|
@@ -53,7 +53,7 @@ class SkillLoadError(SkillAnalyzerError):
|
|
|
53
53
|
pass
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
class SkillAnalysisError(
|
|
56
|
+
class SkillAnalysisError(SkillScannerError):
|
|
57
57
|
"""Raised when skill analysis fails.
|
|
58
58
|
|
|
59
59
|
This typically indicates:
|
|
@@ -65,7 +65,7 @@ class SkillAnalysisError(SkillAnalyzerError):
|
|
|
65
65
|
pass
|
|
66
66
|
|
|
67
67
|
|
|
68
|
-
class SkillValidationError(
|
|
68
|
+
class SkillValidationError(SkillScannerError):
|
|
69
69
|
"""Raised when skill validation fails.
|
|
70
70
|
|
|
71
71
|
This indicates:
|
|
@@ -33,10 +33,10 @@ class SkillLoadError(Exception):
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class SkillLoader:
|
|
36
|
-
"""Loads and parses
|
|
36
|
+
"""Loads and parses Agent Skill packages.
|
|
37
37
|
|
|
38
|
-
Supports the Agent Skills specification format used by
|
|
39
|
-
OpenAI Codex Skills
|
|
38
|
+
Supports the Agent Skills specification format used by
|
|
39
|
+
OpenAI Codex Skills and Cursor Agent Skills. Skills are structured as:
|
|
40
40
|
- SKILL.md (required): YAML frontmatter + Markdown instructions
|
|
41
41
|
- scripts/ (optional): Executable code (Python, Bash)
|
|
42
42
|
- references/ (optional): Documentation and data files
|
|
@@ -143,7 +143,7 @@ class SkillLoader:
|
|
|
143
143
|
# YAML has explicit metadata key (Codex Skills format)
|
|
144
144
|
metadata_field = metadata["metadata"]
|
|
145
145
|
else:
|
|
146
|
-
# Collect remaining fields as metadata (
|
|
146
|
+
# Collect remaining fields as metadata (Agent Skills format)
|
|
147
147
|
# Exclude known fields from being collected as metadata
|
|
148
148
|
known_fields = [
|
|
149
149
|
"name",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# SPDX-License-Identifier: Apache-2.0
|
|
16
16
|
|
|
17
17
|
"""
|
|
18
|
-
Data models for
|
|
18
|
+
Data models for agent skills and security findings.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
from dataclasses import dataclass, field
|
|
@@ -49,6 +49,7 @@ class ThreatCategory(str, Enum):
|
|
|
49
49
|
RESOURCE_ABUSE = "resource_abuse"
|
|
50
50
|
POLICY_VIOLATION = "policy_violation"
|
|
51
51
|
MALWARE = "malware"
|
|
52
|
+
HARMFUL_CONTENT = "harmful_content"
|
|
52
53
|
# New threat categories
|
|
53
54
|
SKILL_DISCOVERY_ABUSE = "skill_discovery_abuse"
|
|
54
55
|
TRANSITIVE_TRUST_ABUSE = "transitive_trust_abuse"
|
|
@@ -61,7 +62,7 @@ class ThreatCategory(str, Enum):
|
|
|
61
62
|
class SkillManifest:
|
|
62
63
|
"""Parsed YAML frontmatter from SKILL.md.
|
|
63
64
|
|
|
64
|
-
Supports
|
|
65
|
+
Supports Codex Skills and Cursor Agent Skills formats,
|
|
65
66
|
which follow the Agent Skills specification. The format includes:
|
|
66
67
|
- Required: name, description
|
|
67
68
|
- Optional: license, compatibility, allowed-tools, metadata
|
|
@@ -82,7 +83,7 @@ class SkillManifest:
|
|
|
82
83
|
if self.allowed_tools is None:
|
|
83
84
|
self.allowed_tools = []
|
|
84
85
|
elif isinstance(self.allowed_tools, str):
|
|
85
|
-
#
|
|
86
|
+
# Agent skill docs commonly show comma-separated tool lists in YAML frontmatter
|
|
86
87
|
# (e.g., "allowed-tools: Read, Grep, Glob"). Treat this as a list.
|
|
87
88
|
parts = [p.strip() for p in self.allowed_tools.split(",")]
|
|
88
89
|
self.allowed_tools = [p for p in parts if p]
|
|
@@ -118,10 +119,10 @@ class SkillFile:
|
|
|
118
119
|
|
|
119
120
|
@dataclass
|
|
120
121
|
class Skill:
|
|
121
|
-
"""Represents a complete
|
|
122
|
+
"""Represents a complete Agent Skill package.
|
|
122
123
|
|
|
123
|
-
Supports the Agent Skills specification format used by
|
|
124
|
-
OpenAI Codex Skills
|
|
124
|
+
Supports the Agent Skills specification format used by
|
|
125
|
+
OpenAI Codex Skills and Cursor Agent Skills. The package structure includes:
|
|
125
126
|
- SKILL.md (required): Manifest and instructions
|
|
126
127
|
- scripts/ (optional): Executable code
|
|
127
128
|
- references/ (optional): Documentation files
|