cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,195 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Table format reporter for scan results.
19
+ """
20
+
21
+ from tabulate import tabulate
22
+
23
+ from ...core.models import Report, ScanResult, Severity
24
+
25
+
26
+ class TableReporter:
27
+ """Generates table format reports."""
28
+
29
+ def __init__(self, format_style: str = "grid", show_snippets: bool = False):
30
+ """
31
+ Initialize table reporter.
32
+
33
+ Args:
34
+ format_style: Table format (grid, simple, plain, etc.)
35
+ show_snippets: If True, include code snippets after table
36
+ """
37
+ self.format_style = format_style
38
+ self.show_snippets = show_snippets
39
+
40
+ def generate_report(self, data: ScanResult | Report) -> str:
41
+ """
42
+ Generate table report.
43
+
44
+ Args:
45
+ data: ScanResult or Report object
46
+
47
+ Returns:
48
+ Table string
49
+ """
50
+ if isinstance(data, ScanResult):
51
+ return self._generate_scan_result_report(data)
52
+ else:
53
+ return self._generate_multi_skill_report(data)
54
+
55
+ def _generate_scan_result_report(self, result: ScanResult) -> str:
56
+ """Generate table report for a single skill scan."""
57
+ lines = []
58
+
59
+ # Header
60
+ lines.append("=" * 80)
61
+ lines.append(f"Claude Skill Security Scan: {result.skill_name}")
62
+ lines.append("=" * 80)
63
+ lines.append("")
64
+
65
+ # Summary table
66
+ summary_data = [
67
+ ["Skill", result.skill_name],
68
+ ["Status", "[OK] SAFE" if result.is_safe else "[FAIL] ISSUES FOUND"],
69
+ ["Max Severity", result.max_severity.value],
70
+ ["Total Findings", len(result.findings)],
71
+ ["Scan Duration", f"{result.scan_duration_seconds:.2f}s"],
72
+ ]
73
+ lines.append(tabulate(summary_data, tablefmt=self.format_style))
74
+ lines.append("")
75
+
76
+ # Findings by severity
77
+ if result.findings:
78
+ severity_data = [
79
+ ["Critical", len(result.get_findings_by_severity(Severity.CRITICAL))],
80
+ ["High", len(result.get_findings_by_severity(Severity.HIGH))],
81
+ ["Medium", len(result.get_findings_by_severity(Severity.MEDIUM))],
82
+ ["Low", len(result.get_findings_by_severity(Severity.LOW))],
83
+ ["Info", len(result.get_findings_by_severity(Severity.INFO))],
84
+ ]
85
+ lines.append("Findings by Severity:")
86
+ lines.append(tabulate(severity_data, headers=["Severity", "Count"], tablefmt=self.format_style))
87
+ lines.append("")
88
+
89
+ # Detailed findings table
90
+ lines.append("Detailed Findings:")
91
+ findings_data = []
92
+ for finding in result.findings:
93
+ location = finding.file_path or "N/A"
94
+ if finding.line_number:
95
+ location += f":{finding.line_number}"
96
+
97
+ findings_data.append(
98
+ [
99
+ finding.severity.value,
100
+ finding.category.value,
101
+ finding.title[:40] + "..." if len(finding.title) > 40 else finding.title,
102
+ location[:30] + "..." if len(location) > 30 else location,
103
+ ]
104
+ )
105
+
106
+ lines.append(
107
+ tabulate(
108
+ findings_data, headers=["Severity", "Category", "Title", "Location"], tablefmt=self.format_style
109
+ )
110
+ )
111
+
112
+ # Add code snippets if requested
113
+ if self.show_snippets:
114
+ lines.append("")
115
+ lines.append("=" * 80)
116
+ lines.append("CODE EVIDENCE")
117
+ lines.append("=" * 80)
118
+ lines.append("")
119
+
120
+ for i, finding in enumerate(result.findings, 1):
121
+ lines.append(f"Finding #{i}: {finding.title}")
122
+ lines.append(f" Location: {finding.file_path}:{finding.line_number or 'N/A'}")
123
+ lines.append(f" Severity: {finding.severity.value}")
124
+ if finding.snippet:
125
+ lines.append(f" Code: {finding.snippet}")
126
+ if finding.remediation:
127
+ lines.append(f" Fix: {finding.remediation}")
128
+ lines.append("")
129
+ else:
130
+ lines.append("[OK] No security issues found!")
131
+
132
+ lines.append("")
133
+ return "\n".join(lines)
134
+
135
+ def _generate_multi_skill_report(self, report: Report) -> str:
136
+ """Generate table report for multiple skills."""
137
+ lines = []
138
+
139
+ # Header
140
+ lines.append("=" * 80)
141
+ lines.append("Claude Skills Security Scan Report")
142
+ lines.append("=" * 80)
143
+ lines.append("")
144
+
145
+ # Summary table
146
+ summary_data = [
147
+ ["Total Skills Scanned", report.total_skills_scanned],
148
+ ["Safe Skills", report.safe_count],
149
+ ["Total Findings", report.total_findings],
150
+ ["Critical", report.critical_count],
151
+ ["High", report.high_count],
152
+ ["Medium", report.medium_count],
153
+ ["Low", report.low_count],
154
+ ["Info", report.info_count],
155
+ ]
156
+ lines.append(tabulate(summary_data, tablefmt=self.format_style))
157
+ lines.append("")
158
+
159
+ # Skills overview table
160
+ lines.append("Skills Overview:")
161
+ skills_data = []
162
+ for result in report.scan_results:
163
+ skills_data.append(
164
+ [
165
+ result.skill_name,
166
+ "[OK] SAFE" if result.is_safe else "[FAIL] ISSUES",
167
+ result.max_severity.value,
168
+ len(result.findings),
169
+ len(result.get_findings_by_severity(Severity.CRITICAL)),
170
+ len(result.get_findings_by_severity(Severity.HIGH)),
171
+ ]
172
+ )
173
+
174
+ lines.append(
175
+ tabulate(
176
+ skills_data,
177
+ headers=["Skill", "Status", "Max Severity", "Total", "Critical", "High"],
178
+ tablefmt=self.format_style,
179
+ )
180
+ )
181
+ lines.append("")
182
+
183
+ return "\n".join(lines)
184
+
185
+ def save_report(self, data: ScanResult | Report, output_path: str):
186
+ """
187
+ Save table report to file.
188
+
189
+ Args:
190
+ data: ScanResult or Report object
191
+ output_path: Path to save file
192
+ """
193
+ report_table = self.generate_report(data)
194
+ with open(output_path, "w", encoding="utf-8") as f:
195
+ f.write(report_table)
@@ -0,0 +1,19 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Security rule definitions and pattern matching.
19
+ """
@@ -0,0 +1,165 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Pattern matching utilities for security rules.
19
+ """
20
+
21
+ import re
22
+ from pathlib import Path
23
+ from typing import Any
24
+
25
+ import yaml
26
+
27
+ from ...core.models import Severity, ThreatCategory
28
+
29
+
30
+ class SecurityRule:
31
+ """Represents a security detection rule."""
32
+
33
+ def __init__(self, rule_data: dict[str, Any]):
34
+ self.id = rule_data["id"]
35
+ self.category = ThreatCategory(rule_data["category"])
36
+ self.severity = Severity(rule_data["severity"])
37
+ self.patterns = rule_data["patterns"]
38
+ self.exclude_patterns = rule_data.get("exclude_patterns", [])
39
+ self.file_types = rule_data.get("file_types", [])
40
+ self.description = rule_data["description"]
41
+ self.remediation = rule_data.get("remediation", "")
42
+
43
+ # Compile regex patterns
44
+ self.compiled_patterns = []
45
+ for pattern in self.patterns:
46
+ try:
47
+ self.compiled_patterns.append(re.compile(pattern))
48
+ except re.error as e:
49
+ print(f"Warning: Failed to compile pattern '{pattern}' for rule {self.id}: {e}")
50
+
51
+ # Compile exclude patterns
52
+ self.compiled_exclude_patterns = []
53
+ for pattern in self.exclude_patterns:
54
+ try:
55
+ self.compiled_exclude_patterns.append(re.compile(pattern))
56
+ except re.error as e:
57
+ print(f"Warning: Failed to compile exclude pattern '{pattern}' for rule {self.id}: {e}")
58
+
59
+ def matches_file_type(self, file_type: str) -> bool:
60
+ """Check if this rule applies to the given file type."""
61
+ if not self.file_types:
62
+ return True # Rule applies to all file types
63
+ return file_type in self.file_types
64
+
65
+ def scan_content(self, content: str, file_path: str | None = None) -> list[dict[str, Any]]:
66
+ """
67
+ Scan content for rule violations.
68
+
69
+ Returns:
70
+ List of matches with line numbers and snippets
71
+ """
72
+ matches = []
73
+ lines = content.split("\n")
74
+
75
+ for line_num, line in enumerate(lines, start=1):
76
+ # Check exclude patterns first
77
+ excluded = False
78
+ for exclude_pattern in self.compiled_exclude_patterns:
79
+ if exclude_pattern.search(line):
80
+ excluded = True
81
+ break
82
+
83
+ if excluded:
84
+ continue
85
+
86
+ for pattern in self.compiled_patterns:
87
+ match = pattern.search(line)
88
+ if match:
89
+ matches.append(
90
+ {
91
+ "line_number": line_num,
92
+ "line_content": line.strip(),
93
+ "matched_pattern": pattern.pattern,
94
+ "matched_text": match.group(0),
95
+ "file_path": file_path,
96
+ }
97
+ )
98
+
99
+ return matches
100
+
101
+
102
+ class RuleLoader:
103
+ """Loads security rules from YAML files."""
104
+
105
+ def __init__(self, rules_file: Path | None = None):
106
+ """
107
+ Initialize rule loader.
108
+
109
+ Args:
110
+ rules_file: Path to rules YAML file. If None, uses default.
111
+ """
112
+ if rules_file is None:
113
+ # Default to signatures.yaml in data/rules directory
114
+ from ...data import DATA_DIR
115
+
116
+ rules_file = DATA_DIR / "rules" / "signatures.yaml"
117
+
118
+ self.rules_file = rules_file
119
+ self.rules: list[SecurityRule] = []
120
+ self.rules_by_id: dict[str, SecurityRule] = {}
121
+ self.rules_by_category: dict[ThreatCategory, list[SecurityRule]] = {}
122
+
123
+ def load_rules(self) -> list[SecurityRule]:
124
+ """
125
+ Load rules from YAML file.
126
+
127
+ Returns:
128
+ List of SecurityRule objects
129
+ """
130
+ try:
131
+ with open(self.rules_file, encoding="utf-8") as f:
132
+ rules_data = yaml.safe_load(f)
133
+ except Exception as e:
134
+ raise RuntimeError(f"Failed to load rules from {self.rules_file}: {e}")
135
+
136
+ self.rules = []
137
+ self.rules_by_id = {}
138
+ self.rules_by_category = {}
139
+
140
+ for rule_data in rules_data:
141
+ try:
142
+ rule = SecurityRule(rule_data)
143
+ self.rules.append(rule)
144
+ self.rules_by_id[rule.id] = rule
145
+
146
+ # Group by category
147
+ if rule.category not in self.rules_by_category:
148
+ self.rules_by_category[rule.category] = []
149
+ self.rules_by_category[rule.category].append(rule)
150
+ except Exception as e:
151
+ print(f"Warning: Failed to load rule {rule_data.get('id', 'unknown')}: {e}")
152
+
153
+ return self.rules
154
+
155
+ def get_rule(self, rule_id: str) -> SecurityRule | None:
156
+ """Get a specific rule by ID."""
157
+ return self.rules_by_id.get(rule_id)
158
+
159
+ def get_rules_for_file_type(self, file_type: str) -> list[SecurityRule]:
160
+ """Get all rules that apply to a specific file type."""
161
+ return [rule for rule in self.rules if rule.matches_file_type(file_type)]
162
+
163
+ def get_rules_for_category(self, category: ThreatCategory) -> list[SecurityRule]:
164
+ """Get all rules in a specific threat category."""
165
+ return self.rules_by_category.get(category, [])
@@ -0,0 +1,157 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ YARA rule scanner for detecting malicious patterns in Claude Skills.
19
+ """
20
+
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ import yara
25
+
26
+
27
+ class YaraScanner:
28
+ """Scanner that uses YARA rules to detect malicious patterns."""
29
+
30
+ def __init__(self, rules_dir: Path | None = None):
31
+ """
32
+ Initialize YARA scanner.
33
+
34
+ Args:
35
+ rules_dir: Path to directory containing .yara files
36
+ """
37
+ if rules_dir is None:
38
+ # Default to yara_rules directory
39
+ from ...data import YARA_RULES_DIR
40
+
41
+ rules_dir = YARA_RULES_DIR
42
+
43
+ self.rules_dir = Path(rules_dir)
44
+ self.rules = None
45
+ self._load_rules()
46
+
47
+ def _load_rules(self):
48
+ """Load all YARA rules from directory."""
49
+ if not self.rules_dir.exists():
50
+ raise FileNotFoundError(f"YARA rules directory not found: {self.rules_dir}")
51
+
52
+ # Find all .yara files
53
+ yara_files = list(self.rules_dir.glob("*.yara"))
54
+ if not yara_files:
55
+ raise FileNotFoundError(f"No .yara files found in {self.rules_dir}")
56
+
57
+ # Compile all rules
58
+ rules_dict = {}
59
+ for yara_file in yara_files:
60
+ namespace = yara_file.stem # Use filename as namespace
61
+ rules_dict[namespace] = str(yara_file)
62
+
63
+ try:
64
+ self.rules = yara.compile(filepaths=rules_dict)
65
+ except yara.SyntaxError as e:
66
+ raise RuntimeError(f"Failed to compile YARA rules: {e}")
67
+
68
+ def scan_content(self, content: str, file_path: str | None = None) -> list[dict[str, Any]]:
69
+ """
70
+ Scan content with YARA rules.
71
+
72
+ Args:
73
+ content: Text content to scan
74
+ file_path: Optional file path for context
75
+
76
+ Returns:
77
+ List of matches with metadata
78
+ """
79
+ if not self.rules:
80
+ return []
81
+
82
+ matches = []
83
+
84
+ try:
85
+ yara_matches = self.rules.match(data=content)
86
+
87
+ for match in yara_matches:
88
+ # Extract metadata from the rule
89
+ meta = {
90
+ "rule_name": match.rule,
91
+ "namespace": match.namespace,
92
+ "tags": match.tags,
93
+ "meta": match.meta,
94
+ }
95
+
96
+ # Find which strings matched and their locations
97
+ matched_strings = []
98
+ for string in match.strings:
99
+ for instance in string.instances:
100
+ # Find line number for this match
101
+ line_num = content[: instance.offset].count("\n") + 1
102
+ line_start = content.rfind("\n", 0, instance.offset) + 1
103
+ line_end = content.find("\n", instance.offset)
104
+ if line_end == -1:
105
+ line_end = len(content)
106
+ line_content = content[line_start:line_end].strip()
107
+
108
+ matched_strings.append(
109
+ {
110
+ "identifier": string.identifier,
111
+ "offset": instance.offset,
112
+ "matched_data": instance.matched_data.decode("utf-8", errors="ignore"),
113
+ "line_number": line_num,
114
+ "line_content": line_content,
115
+ }
116
+ )
117
+
118
+ matches.append(
119
+ {
120
+ "rule_name": match.rule,
121
+ "namespace": match.namespace,
122
+ "file_path": file_path,
123
+ "meta": meta,
124
+ "strings": matched_strings,
125
+ }
126
+ )
127
+
128
+ except yara.Error as e:
129
+ print(f"Warning: YARA scanning error: {e}")
130
+
131
+ return matches
132
+
133
+ def scan_file(self, file_path: Path) -> list[dict[str, Any]]:
134
+ """
135
+ Scan a file with YARA rules.
136
+
137
+ Args:
138
+ file_path: Path to file to scan
139
+
140
+ Returns:
141
+ List of matches
142
+ """
143
+ try:
144
+ with open(file_path, encoding="utf-8") as f:
145
+ content = f.read()
146
+ return self.scan_content(content, str(file_path))
147
+ except (OSError, UnicodeDecodeError) as e:
148
+ print(f"Warning: Could not read file {file_path}: {e}")
149
+ return []
150
+
151
+ def get_loaded_rules(self) -> list[str]:
152
+ """Get list of loaded rule names."""
153
+ if not self.rules:
154
+ return []
155
+ # YARA doesn't provide easy access to rule names, return namespaces
156
+ yara_files = list(self.rules_dir.glob("*.yara"))
157
+ return [f.stem for f in yara_files]