cisco-ai-skill-scanner 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/METADATA +16 -1
  2. {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/RECORD +37 -35
  3. skill_scanner/_version.py +2 -2
  4. skill_scanner/api/api_cli.py +2 -2
  5. skill_scanner/api/api_server.py +1 -1
  6. skill_scanner/cli/cli.py +60 -2
  7. skill_scanner/config/yara_modes.py +314 -0
  8. skill_scanner/core/analyzers/llm_analyzer.py +3 -3
  9. skill_scanner/core/analyzers/meta_analyzer.py +50 -18
  10. skill_scanner/core/analyzers/static.py +177 -27
  11. skill_scanner/core/models.py +1 -0
  12. skill_scanner/core/reporters/markdown_reporter.py +9 -3
  13. skill_scanner/core/static_analysis/context_extractor.py +87 -13
  14. skill_scanner/data/prompts/code_alignment_threat_analysis_prompt.md +103 -28
  15. skill_scanner/data/prompts/llm_response_schema.json +3 -3
  16. skill_scanner/data/prompts/skill_meta_analysis_prompt.md +10 -9
  17. skill_scanner/data/prompts/skill_threat_analysis_prompt.md +42 -6
  18. skill_scanner/data/rules/signatures.yaml +141 -35
  19. skill_scanner/data/yara_rules/autonomy_abuse_generic.yara +66 -0
  20. skill_scanner/data/yara_rules/{skill_discovery_abuse.yara → capability_inflation_generic.yara} +7 -4
  21. skill_scanner/data/yara_rules/code_execution_generic.yara +76 -0
  22. skill_scanner/data/yara_rules/{coercive_injection.yara → coercive_injection_generic.yara} +2 -2
  23. skill_scanner/data/yara_rules/command_injection_generic.yara +77 -0
  24. skill_scanner/data/yara_rules/{credential_harvesting.yara → credential_harvesting_generic.yara} +25 -4
  25. skill_scanner/data/yara_rules/{transitive_trust_abuse.yara → indirect_prompt_injection_generic.yara} +8 -5
  26. skill_scanner/data/yara_rules/{prompt_injection.yara → prompt_injection_generic.yara} +2 -2
  27. skill_scanner/data/yara_rules/{unicode_steganography.yara → prompt_injection_unicode_steganography.yara} +23 -17
  28. skill_scanner/data/yara_rules/script_injection_generic.yara +82 -0
  29. skill_scanner/data/yara_rules/{sql_injection.yara → sql_injection_generic.yara} +22 -8
  30. skill_scanner/data/yara_rules/system_manipulation_generic.yara +79 -0
  31. skill_scanner/data/yara_rules/tool_chaining_abuse_generic.yara +72 -0
  32. skill_scanner/threats/__init__.py +24 -2
  33. skill_scanner/threats/cisco_ai_taxonomy.py +274 -0
  34. skill_scanner/threats/threats.py +28 -99
  35. skill_scanner/data/yara_rules/autonomy_abuse.yara +0 -66
  36. skill_scanner/data/yara_rules/code_execution.yara +0 -61
  37. skill_scanner/data/yara_rules/command_injection.yara +0 -54
  38. skill_scanner/data/yara_rules/script_injection.yara +0 -83
  39. skill_scanner/data/yara_rules/system_manipulation.yara +0 -65
  40. skill_scanner/data/yara_rules/tool_chaining_abuse.yara +0 -60
  41. {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/WHEEL +0 -0
  42. {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/entry_points.txt +0 -0
  43. {cisco_ai_skill_scanner-1.0.1.dist-info → cisco_ai_skill_scanner-1.0.2.dist-info}/licenses/LICENSE +0 -0
@@ -787,29 +787,42 @@ def apply_meta_analysis_to_results(
787
787
  meta_result: MetaAnalysisResult,
788
788
  skill: Skill,
789
789
  ) -> list[Finding]:
790
- """Apply meta-analysis results to filter and enrich findings.
790
+ """Apply meta-analysis results to enrich all findings with metadata.
791
791
 
792
792
  This function:
793
- 1. Filters out false positives identified by meta-analysis
793
+ 1. Marks false positives with metadata (but keeps them in output)
794
794
  2. Adds meta-analysis enrichments to validated findings
795
795
  3. Adds any new threats detected by meta-analyzer
796
796
 
797
+ All findings are retained in the output with metadata indicating whether
798
+ they were identified as false positives. This allows downstream consumers
799
+ (like VS Code extensions) to filter or display them as needed.
800
+
797
801
  Args:
798
802
  original_findings: Original findings from all analyzers
799
803
  meta_result: Results from meta-analysis
800
804
  skill: The skill being analyzed
801
805
 
802
806
  Returns:
803
- Filtered and enriched list of findings
807
+ All findings with meta-analysis metadata added
804
808
  """
805
- # Build set of false positive indices
806
- fp_indices = set()
809
+ # Build false positive lookup with reasons and metadata
810
+ fp_data: dict[int, dict[str, Any]] = {}
807
811
  for fp in meta_result.false_positives:
808
812
  if "_index" in fp:
809
- fp_indices.add(fp["_index"])
813
+ fp_data[fp["_index"]] = {
814
+ "reason": fp.get("reason") or fp.get("false_positive_reason") or "Identified as likely false positive",
815
+ "confidence": fp.get("confidence"),
816
+ }
810
817
 
811
818
  # Build enrichment lookup from validated findings
812
- enrichments = {}
819
+ enrichments: dict[int, dict[str, Any]] = {}
820
+ priority_lookup: dict[int, int] = {}
821
+
822
+ # Build priority rank lookup from priority_order
823
+ for rank, idx in enumerate(meta_result.priority_order, start=1):
824
+ priority_lookup[idx] = rank
825
+
813
826
  for vf in meta_result.validated_findings:
814
827
  idx = vf.get("_index")
815
828
  if idx is not None:
@@ -821,25 +834,44 @@ def apply_meta_analysis_to_results(
821
834
  "meta_impact": vf.get("impact"),
822
835
  }
823
836
 
824
- # Filter and enrich original findings
837
+ # Enrich all findings (do not filter out false positives)
825
838
  result_findings = []
826
839
  for i, finding in enumerate(original_findings):
827
- # Skip false positives
828
- if i in fp_indices:
829
- continue
830
-
831
- # Add enrichments if available
832
- if i in enrichments:
833
- for key, value in enrichments[i].items():
834
- if value is not None:
835
- finding.metadata[key] = value
840
+ # Ensure metadata dict exists
841
+ if finding.metadata is None:
842
+ finding.metadata = {}
843
+
844
+ # Mark false positives with metadata (but keep them in output)
845
+ if i in fp_data:
846
+ finding.metadata["meta_false_positive"] = True
847
+ finding.metadata["meta_reason"] = fp_data[i]["reason"]
848
+ if fp_data[i].get("confidence") is not None:
849
+ finding.metadata["meta_confidence"] = fp_data[i]["confidence"]
836
850
  else:
837
- finding.metadata["meta_reviewed"] = True
851
+ # Mark as validated (not a false positive)
852
+ finding.metadata["meta_false_positive"] = False
853
+
854
+ # Add enrichments if available for validated findings
855
+ if i in enrichments:
856
+ for key, value in enrichments[i].items():
857
+ if value is not None:
858
+ finding.metadata[key] = value
859
+ else:
860
+ finding.metadata["meta_reviewed"] = True
861
+
862
+ # Add priority rank if available
863
+ if i in priority_lookup:
864
+ finding.metadata["meta_priority"] = priority_lookup[i]
838
865
 
839
866
  result_findings.append(finding)
840
867
 
841
868
  # Add missed threats as new findings
842
869
  missed_findings = meta_result.get_missed_threats(skill)
870
+ for mf in missed_findings:
871
+ # Ensure missed threats are marked as validated (not false positives)
872
+ if mf.metadata is None:
873
+ mf.metadata = {}
874
+ mf.metadata["meta_false_positive"] = False
843
875
  result_findings.extend(missed_findings)
844
876
 
845
877
  return result_findings
@@ -24,6 +24,7 @@ import re
24
24
  from pathlib import Path
25
25
  from typing import Any
26
26
 
27
+ from ...config.yara_modes import DEFAULT_YARA_MODE, YaraModeConfig
27
28
  from ...core.models import Finding, Severity, Skill, ThreatCategory
28
29
  from ...core.rules.patterns import RuleLoader, SecurityRule
29
30
  from ...core.rules.yara_scanner import YaraScanner
@@ -91,28 +92,93 @@ _RM_TARGET_PATTERN = re.compile(r"rm\s+-r[^;]*?\s+([^\s;]+)")
91
92
  class StaticAnalyzer(BaseAnalyzer):
92
93
  """Static pattern-based security analyzer."""
93
94
 
94
- def __init__(self, rules_file: Path | None = None, use_yara: bool = True):
95
+ def __init__(
96
+ self,
97
+ rules_file: Path | None = None,
98
+ use_yara: bool = True,
99
+ yara_mode: YaraModeConfig | str | None = None,
100
+ custom_yara_rules_path: str | Path | None = None,
101
+ disabled_rules: set[str] | None = None,
102
+ ):
95
103
  """
96
104
  Initialize static analyzer.
97
105
 
98
106
  Args:
99
- rules_file: Optional custom rules file
107
+ rules_file: Optional custom YAML rules file
100
108
  use_yara: Whether to use YARA scanning (default: True)
109
+ yara_mode: YARA detection mode - can be:
110
+ - YaraModeConfig instance
111
+ - Mode name string: "strict", "balanced", "permissive"
112
+ - None for default (balanced)
113
+ custom_yara_rules_path: Path to directory containing custom YARA rules
114
+ (.yara files). If provided, uses these instead of built-in rules.
115
+ disabled_rules: Set of rule names to disable. Rules can be YARA rule
116
+ names (e.g., "YARA_script_injection") or static rule IDs
117
+ (e.g., "COMMAND_INJECTION_EVAL").
101
118
  """
102
119
  super().__init__("static_analyzer")
103
120
 
104
121
  self.rule_loader = RuleLoader(rules_file)
105
122
  self.rule_loader.load_rules()
106
123
 
124
+ # Configure YARA mode
125
+ if yara_mode is None:
126
+ self.yara_mode = DEFAULT_YARA_MODE
127
+ elif isinstance(yara_mode, str):
128
+ self.yara_mode = YaraModeConfig.from_mode_name(yara_mode)
129
+ else:
130
+ self.yara_mode = yara_mode
131
+
132
+ # Store disabled rules (merge with mode-based disabled rules)
133
+ self.disabled_rules = set(disabled_rules or set())
134
+ self.disabled_rules.update(self.yara_mode.disabled_rules)
135
+
136
+ # Store custom YARA rules path
137
+ self.custom_yara_rules_path = Path(custom_yara_rules_path) if custom_yara_rules_path else None
138
+
107
139
  self.use_yara = use_yara
108
140
  self.yara_scanner = None
109
141
  if use_yara:
110
142
  try:
111
- self.yara_scanner = YaraScanner()
143
+ # Use custom rules path if provided
144
+ if self.custom_yara_rules_path:
145
+ self.yara_scanner = YaraScanner(rules_dir=self.custom_yara_rules_path)
146
+ logger.info("Using custom YARA rules from: %s", self.custom_yara_rules_path)
147
+ else:
148
+ self.yara_scanner = YaraScanner()
112
149
  except Exception as e:
113
150
  logger.warning("Could not load YARA scanner: %s", e)
114
151
  self.yara_scanner = None
115
152
 
153
+ def _is_rule_enabled(self, rule_name: str) -> bool:
154
+ """
155
+ Check if a rule is enabled.
156
+
157
+ A rule is enabled if:
158
+ 1. It's enabled in the current YARA mode
159
+ 2. It's not in the explicitly disabled rules set
160
+
161
+ Args:
162
+ rule_name: Name of the rule to check (e.g., "YARA_script_injection")
163
+
164
+ Returns:
165
+ True if the rule is enabled, False otherwise
166
+ """
167
+ # Check mode-based enable/disable first
168
+ if not self.yara_mode.is_rule_enabled(rule_name):
169
+ return False
170
+
171
+ # Check if explicitly disabled via --disable-rule
172
+ if rule_name in self.disabled_rules:
173
+ return False
174
+
175
+ # Also check without YARA_ prefix for convenience
176
+ base_name = rule_name.replace("YARA_", "") if rule_name.startswith("YARA_") else rule_name
177
+ if base_name in self.disabled_rules:
178
+ return False
179
+
180
+ return True
181
+
116
182
  def analyze(self, skill: Skill) -> list[Finding]:
117
183
  """
118
184
  Analyze skill using static pattern matching.
@@ -144,6 +210,10 @@ class StaticAnalyzer(BaseAnalyzer):
144
210
 
145
211
  findings.extend(self._scan_asset_files(skill))
146
212
 
213
+ # Filter out disabled rules
214
+ if self.disabled_rules:
215
+ findings = [f for f in findings if self._is_rule_enabled(f.rule_id)]
216
+
147
217
  return findings
148
218
 
149
219
  def _check_manifest(self, skill: Skill) -> list[Finding]:
@@ -157,7 +227,7 @@ class StaticAnalyzer(BaseAnalyzer):
157
227
  id=self._generate_finding_id("MANIFEST_INVALID_NAME", "manifest"),
158
228
  rule_id="MANIFEST_INVALID_NAME",
159
229
  category=ThreatCategory.POLICY_VIOLATION,
160
- severity=Severity.LOW,
230
+ severity=Severity.INFO,
161
231
  title="Skill name does not follow agent skills naming rules",
162
232
  description=(
163
233
  f"Skill name '{manifest.name}' is invalid. Agent skills require lowercase letters, numbers, "
@@ -246,7 +316,7 @@ class StaticAnalyzer(BaseAnalyzer):
246
316
  id=self._generate_finding_id("MANIFEST_MISSING_LICENSE", "manifest"),
247
317
  rule_id="MANIFEST_MISSING_LICENSE",
248
318
  category=ThreatCategory.POLICY_VIOLATION,
249
- severity=Severity.LOW,
319
+ severity=Severity.INFO,
250
320
  title="Skill does not specify a license",
251
321
  description="Skill manifest does not include a 'license' field. Specifying a license helps users understand usage terms.",
252
322
  file_path="SKILL.md",
@@ -553,7 +623,7 @@ class StaticAnalyzer(BaseAnalyzer):
553
623
  ]
554
624
 
555
625
  socket_external_indicators = ["socket.connect", "socket.create_connection"]
556
- socket_localhost_indicators = ["localhost", "127.0.0.1", "0.0.0.0"]
626
+ socket_localhost_indicators = ["localhost", "127.0.0.1", "::1"]
557
627
 
558
628
  for skill_file in skill.get_scripts():
559
629
  content = skill_file.read_content()
@@ -622,8 +692,8 @@ class StaticAnalyzer(BaseAnalyzer):
622
692
  Finding(
623
693
  id=self._generate_finding_id("ALLOWED_TOOLS_WRITE_VIOLATION", skill.name),
624
694
  rule_id="ALLOWED_TOOLS_WRITE_VIOLATION",
625
- category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
626
- severity=Severity.HIGH,
695
+ category=ThreatCategory.POLICY_VIOLATION,
696
+ severity=Severity.MEDIUM,
627
697
  title="Skill declares no Write tool but bundled scripts write files",
628
698
  description=(
629
699
  f"Skill restricts tools to {skill.manifest.allowed_tools} but bundled scripts appear to "
@@ -651,22 +721,11 @@ class StaticAnalyzer(BaseAnalyzer):
651
721
  )
652
722
  )
653
723
 
654
- if "python" not in allowed_tools_lower:
655
- python_scripts = [f for f in skill.files if f.file_type == "python" and f.relative_path != "SKILL.md"]
656
- if python_scripts:
657
- findings.append(
658
- Finding(
659
- id=self._generate_finding_id("ALLOWED_TOOLS_PYTHON_VIOLATION", skill.name),
660
- rule_id="ALLOWED_TOOLS_PYTHON_VIOLATION",
661
- category=ThreatCategory.UNAUTHORIZED_TOOL_USE,
662
- severity=Severity.HIGH,
663
- title="Python scripts present but Python tool not in allowed-tools",
664
- description=f"Skill restricts tools to {skill.manifest.allowed_tools} but includes Python scripts",
665
- file_path=None,
666
- remediation="Add 'Python' to allowed-tools or remove Python scripts",
667
- analyzer="static",
668
- )
669
- )
724
+ # Note: ALLOWED_TOOLS_PYTHON_VIOLATION removed - too many false positives
725
+ # Many skills include Python helper scripts that are NOT invoked directly by the agent
726
+ # (e.g., build scripts, test files, utilities). The allowed-tools list controls what
727
+ # the AGENT can use, not what helper scripts exist in the repo.
728
+ # If direct Python execution is a concern, COMMAND_INJECTION_EVAL catches actual risks.
670
729
 
671
730
  if "grep" not in allowed_tools_lower:
672
731
  if self._code_uses_grep(skill):
@@ -927,6 +986,10 @@ class StaticAnalyzer(BaseAnalyzer):
927
986
 
928
987
  yara_matches = self.yara_scanner.scan_content(skill.instruction_body, "SKILL.md")
929
988
  for match in yara_matches:
989
+ rule_name = match.get("rule_name", "")
990
+ # Check if rule is enabled in current mode and not explicitly disabled
991
+ if not self._is_rule_enabled(rule_name):
992
+ continue
930
993
  findings.extend(self._create_findings_from_yara_match(match, skill))
931
994
 
932
995
  for skill_file in skill.get_scripts():
@@ -935,7 +998,7 @@ class StaticAnalyzer(BaseAnalyzer):
935
998
  yara_matches = self.yara_scanner.scan_content(content, skill_file.relative_path)
936
999
  for match in yara_matches:
937
1000
  rule_name = match.get("rule_name", "")
938
- if rule_name == "skill_discovery_abuse":
1001
+ if rule_name == "capability_inflation_generic":
939
1002
  continue
940
1003
  findings.extend(self._create_findings_from_yara_match(match, skill, content))
941
1004
 
@@ -1006,8 +1069,28 @@ class StaticAnalyzer(BaseAnalyzer):
1006
1069
  ".cache",
1007
1070
  }
1008
1071
 
1072
+ PLACEHOLDER_MARKERS = {
1073
+ "your-",
1074
+ "your_",
1075
+ "your ",
1076
+ "example",
1077
+ "sample",
1078
+ "dummy",
1079
+ "placeholder",
1080
+ "replace",
1081
+ "changeme",
1082
+ "change_me",
1083
+ "<your",
1084
+ "<insert",
1085
+ }
1086
+
1009
1087
  for string_match in match["strings"]:
1010
- if rule_name == "code_execution":
1088
+ # Skip exclusion patterns (these are used in YARA conditions but shouldn't create findings)
1089
+ string_identifier = string_match.get("identifier", "")
1090
+ if string_identifier.startswith("$documentation") or string_identifier.startswith("$safe"):
1091
+ continue
1092
+
1093
+ if rule_name == "code_execution_generic":
1011
1094
  line_content = string_match.get("line_content", "").lower()
1012
1095
  matched_data = string_match.get("matched_data", "").lower()
1013
1096
 
@@ -1028,7 +1111,7 @@ class StaticAnalyzer(BaseAnalyzer):
1028
1111
  if is_safe_command:
1029
1112
  continue
1030
1113
 
1031
- if rule_name == "system_manipulation":
1114
+ if rule_name == "system_manipulation_generic":
1032
1115
  line_content = string_match.get("line_content", "").lower()
1033
1116
 
1034
1117
  if "rm -rf" in line_content or "rm -r" in line_content:
@@ -1040,6 +1123,73 @@ class StaticAnalyzer(BaseAnalyzer):
1040
1123
  if all_safe:
1041
1124
  continue
1042
1125
 
1126
+ # Credential harvesting post-filters (controlled by mode)
1127
+ if rule_name == "credential_harvesting_generic":
1128
+ if self.yara_mode.credential_harvesting.filter_placeholder_patterns:
1129
+ line_content = string_match.get("line_content", "")
1130
+ matched_data = string_match.get("matched_data", "")
1131
+ combined = f"{line_content} {matched_data}".lower()
1132
+
1133
+ if any(marker in combined for marker in PLACEHOLDER_MARKERS):
1134
+ continue
1135
+
1136
+ if "export " in combined and "=" in combined:
1137
+ _, value = combined.split("=", 1)
1138
+ if any(marker in value for marker in PLACEHOLDER_MARKERS):
1139
+ continue
1140
+
1141
+ # Tool chaining post-filters (controlled by mode)
1142
+ if rule_name == "tool_chaining_abuse_generic":
1143
+ line_content = string_match.get("line_content", "")
1144
+ lower_line = line_content.lower()
1145
+ exfil_hints = ("send", "upload", "transmit", "webhook", "slack", "exfil", "forward")
1146
+
1147
+ if self.yara_mode.tool_chaining.filter_generic_http_verbs:
1148
+ if (
1149
+ "get" in lower_line
1150
+ and "post" in lower_line
1151
+ and not any(hint in lower_line for hint in exfil_hints)
1152
+ ):
1153
+ continue
1154
+
1155
+ if self.yara_mode.tool_chaining.filter_api_documentation:
1156
+ if any(
1157
+ token in line_content for token in ("@app.", "app.", "router.", "route", "endpoint")
1158
+ ) and not any(hint in lower_line for hint in exfil_hints):
1159
+ continue
1160
+
1161
+ if self.yara_mode.tool_chaining.filter_email_field_mentions:
1162
+ if "by email" in lower_line or "email address" in lower_line or "email field" in lower_line:
1163
+ continue
1164
+
1165
+ # Unicode steganography post-filters
1166
+ if rule_name == "prompt_injection_unicode_steganography":
1167
+ line_content = string_match.get("line_content", "")
1168
+ matched_data = string_match.get("matched_data", "")
1169
+ has_ascii_letters = any("A" <= char <= "Z" or "a" <= char <= "z" for char in line_content)
1170
+
1171
+ # Filter short matches in non-Latin context (likely legitimate i18n)
1172
+ if len(matched_data) <= 2 and not has_ascii_letters:
1173
+ continue
1174
+
1175
+ # Filter if context suggests legitimate internationalization
1176
+ i18n_markers = ("i18n", "locale", "translation", "lang=", "charset", "utf-8", "encoding")
1177
+ if any(marker in line_content.lower() for marker in i18n_markers):
1178
+ continue
1179
+
1180
+ # Filter Cyrillic, CJK, Arabic, Hebrew text (legitimate non-Latin content)
1181
+ # These are indicated by presence of those scripts without zero-width chars
1182
+ cyrillic_cjk_pattern = any(
1183
+ ("\u0400" <= char <= "\u04ff") # Cyrillic
1184
+ or ("\u4e00" <= char <= "\u9fff") # CJK Unified
1185
+ or ("\u0600" <= char <= "\u06ff") # Arabic
1186
+ or ("\u0590" <= char <= "\u05ff") # Hebrew
1187
+ for char in line_content
1188
+ )
1189
+ # If the line has legitimate non-Latin text but matched only 1-2 zero-width chars, skip
1190
+ if cyrillic_cjk_pattern and len(matched_data) < 10:
1191
+ continue
1192
+
1043
1193
  finding_id = self._generate_finding_id(f"YARA_{rule_name}", f"{file_path}:{string_match['line_number']}")
1044
1194
 
1045
1195
  description = meta.get("description", f"YARA rule {rule_name} matched")
@@ -49,6 +49,7 @@ class ThreatCategory(str, Enum):
49
49
  RESOURCE_ABUSE = "resource_abuse"
50
50
  POLICY_VIOLATION = "policy_violation"
51
51
  MALWARE = "malware"
52
+ HARMFUL_CONTENT = "harmful_content"
52
53
  # New threat categories
53
54
  SKILL_DISCOVERY_ABUSE = "skill_discovery_abuse"
54
55
  TRANSITIVE_TRUST_ABUSE = "transitive_trust_abuse"
@@ -18,6 +18,8 @@
18
18
  Markdown format reporter for scan results.
19
19
  """
20
20
 
21
+ import re
22
+
21
23
  from ...core.models import Finding, Report, ScanResult, Severity
22
24
 
23
25
 
@@ -137,6 +139,7 @@ class MarkdownReporter:
137
139
  lines.append("")
138
140
 
139
141
  for result in report.scan_results:
142
+ lines.append("\n---\n")
140
143
  status_icon = "[OK]" if result.is_safe else "[FAIL]"
141
144
  lines.append(f"### {status_icon} {result.skill_name}")
142
145
  lines.append("")
@@ -186,9 +189,12 @@ class MarkdownReporter:
186
189
  if finding.snippet:
187
190
  lines.append(f"{indent_str}")
188
191
  lines.append(f"{indent_str}**Code Snippet:**")
189
- lines.append(f"{indent_str}```")
190
- lines.append(f"{indent_str}{finding.snippet}")
191
- lines.append(f"{indent_str}```")
192
+ if not re.search(r"```", finding.snippet):
193
+ lines.append(f"{indent_str}```")
194
+ for line in finding.snippet.splitlines():
195
+ lines.append(f"{indent_str}{line}")
196
+ if not re.search(r"```", finding.snippet):
197
+ lines.append(f"{indent_str}```")
192
198
 
193
199
  if finding.remediation:
194
200
  lines.append(f"{indent_str}")
@@ -141,18 +141,68 @@ class SkillFunctionContext:
141
141
  class ContextExtractor:
142
142
  """Extract comprehensive security context from skill scripts."""
143
143
 
144
- SUSPICIOUS_DOMAINS = ["attacker.com", "evil.com", "malicious.com", "pastebin.com"]
144
+ # ONLY flag URLs to explicitly suspicious domains - not all unknown URLs
145
+ # Reference: https://lots-project.com/ (Living Off Trusted Sites)
146
+ SUSPICIOUS_DOMAINS = [
147
+ # Known exfil/C2/paste services (LOTS: Download, Exfiltration, C&C)
148
+ "pastebin.com",
149
+ "hastebin.com",
150
+ "paste.ee",
151
+ "rentry.co",
152
+ "zerobin.net",
153
+ "textbin.net",
154
+ "termbin.com",
155
+ "sprunge.us",
156
+ "clbin.com",
157
+ "ix.io",
158
+ "pastetext.net",
159
+ "pastie.org",
160
+ "ideone.com",
161
+ # File sharing services (LOTS: Download, Exfiltration)
162
+ "transfer.sh",
163
+ "filebin.net",
164
+ "gofile.io",
165
+ "anonfiles.com",
166
+ "mediafire.com",
167
+ "mega.nz",
168
+ "wetransfer.com",
169
+ "filetransfer.io",
170
+ "ufile.io",
171
+ "4sync.com",
172
+ "uplooder.net",
173
+ "filecloudonline.com",
174
+ "sendspace.com",
175
+ "siasky.net",
176
+ # Tunneling/webhook services (LOTS: C&C, Exfiltration)
177
+ "webhook.site",
178
+ "requestbin",
179
+ "ngrok.io",
180
+ "pipedream.net",
181
+ "localhost.run",
182
+ "trycloudflare.com",
183
+ # Code execution services (LOTS: C&C, Download)
184
+ "codepen.io",
185
+ "repl.co",
186
+ "glitch.me",
187
+ # Explicitly malicious example domains
188
+ "attacker.example.com",
189
+ "evil.example.com",
190
+ "malicious.com",
191
+ "c2-server.com",
192
+ ]
145
193
 
146
- # Legitimate domains that should NOT be flagged as suspicious
194
+ # Domains that are always safe (not flagged even if matched by SUSPICIOUS_DOMAINS pattern)
195
+ # NOTE: We intentionally exclude file-hosting/messaging services that appear in LOTS
196
+ # (https://lots-project.com/) with Download/C&C capabilities, even if commonly used.
147
197
  LEGITIMATE_DOMAINS = [
148
- # AI provider services
198
+ # AI provider services (API endpoints only, not user content)
149
199
  "api.anthropic.com",
150
200
  "statsig.anthropic.com",
151
- # Code repositories
152
- "github.com",
153
- "gitlab.com",
154
- "bitbucket.org",
155
- # Package registries
201
+ "api.openai.com",
202
+ "api.together.xyz",
203
+ "api.cohere.ai",
204
+ "generativelanguage.googleapis.com",
205
+ # Package registries (read-only, no user-uploaded executables)
156
206
  "registry.npmjs.org",
157
207
  "npmjs.com",
158
208
  "npmjs.org",
@@ -161,18 +211,43 @@ class ContextExtractor:
161
211
  "pypi.org",
162
212
  "files.pythonhosted.org",
163
213
  "pythonhosted.org",
214
+ "crates.io",
215
+ "rubygems.org",
216
+ "pkg.go.dev",
164
217
  # System packages
165
218
  "archive.ubuntu.com",
166
219
  "security.ubuntu.com",
220
+ "debian.org",
167
221
  # XML schemas (for OOXML document processing)
168
222
  "schemas.microsoft.com",
169
223
  "schemas.openxmlformats.org",
170
224
  "www.w3.org",
171
225
  "purl.org",
226
+ "json-schema.org",
172
227
  # Localhost and development
173
228
  "localhost",
174
229
  "127.0.0.1",
175
230
  "0.0.0.0",
231
+ "::1",
232
+ # Common safe services (API-focused, not file hosting)
233
+ "stripe.com",
234
+ "zoom.us",
235
+ "twilio.com",
236
+ "mailgun.com",
237
+ "sentry.io",
238
+ "datadog.com",
239
+ "newrelic.com",
240
+ "elastic.co",
241
+ "mongodb.com",
242
+ "redis.io",
243
+ "postgresql.org",
244
+ # NOTE: The following are intentionally NOT in this list due to LOTS risk:
245
+ # - github.com, gitlab.com, bitbucket.org (Download, C&C)
246
+ # - raw.githubusercontent.com (Download, C&C)
247
+ # - discord.com, telegram.org, slack.com (C&C, Exfil)
248
+ # - amazonaws.com, googleapis.com, azure.com, cloudflare.com (wildcard hosting)
249
+ # - google.com, microsoft.com (too broad, includes file hosting)
250
+ # - sendgrid.com (email tracking/download)
176
251
  ]
177
252
 
178
253
  def extract_context(self, file_path: Path, source_code: str) -> SkillScriptContext:
@@ -259,7 +334,8 @@ class ContextExtractor:
259
334
  # Also collect module-level strings (class attributes, etc.)
260
335
  all_strings.extend(parser.module_strings)
261
336
 
262
- # Find suspicious URLs (filter out legitimate domains and docstrings)
337
+ # Find suspicious URLs - ONLY flag URLs to known-bad destinations
338
+ # Don't flag unknown URLs - that creates too many false positives
263
339
  suspicious_urls = []
264
340
  for s in all_strings:
265
341
  # Skip if not URL-like or contains newlines (docstrings)
@@ -271,12 +347,10 @@ class ContextExtractor:
271
347
  # Skip if contains legitimate domain
272
348
  if any(domain in s for domain in self.LEGITIMATE_DOMAINS):
273
349
  continue
274
- # Flag if contains known suspicious domain OR is generic http URL
350
+ # ONLY flag if URL contains a known suspicious domain
351
+ # Don't flag all unknown URLs - that's too aggressive
275
352
  if any(domain in s for domain in self.SUSPICIOUS_DOMAINS):
276
353
  suspicious_urls.append(s)
277
- # Generic URLs only if they look suspicious (not just schema URLs)
278
- elif not any(schema in s for schema in ["schemas.", "www.w3.org", "xmlns"]):
279
- suspicious_urls.append(s)
280
354
 
281
355
  # Create context
282
356
  context = SkillScriptContext(