cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,341 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Trigger analyzer for detecting overly generic skill descriptions.
19
+
20
+ Claude uses skill descriptions to decide when to activate a skill.
21
+ Overly generic descriptions can cause trigger hijacking where a skill
22
+ activates for unrelated user requests.
23
+ """
24
+
25
+ import re
26
+
27
+ from ..models import Finding, Severity, Skill, ThreatCategory
28
+ from .base import BaseAnalyzer
29
+
30
+
31
+ class TriggerAnalyzer(BaseAnalyzer):
32
+ """Analyzes skill descriptions for trigger specificity issues."""
33
+
34
+ # Generic patterns that are too broad for skill descriptions
35
+ # Only match truly generic descriptions that could hijack any query
36
+ # "Toolkit for X" is specific if X is specific, so we don't flag it
37
+ GENERIC_PATTERNS = [
38
+ r"^help\s*(me|you|with\s+anything)?\s*$", # Just "help" or "help me"
39
+ r"^(a|an|the)?\s*assistant\s*$", # Just "assistant" with no context
40
+ r"^(a|an|the)?\s*helper\s*$", # Just "helper" with no context
41
+ r"^(I |this )?(can |will )?do\s+(anything|everything)\s*(for you)?\.?$",
42
+ r"^general\s+purpose\s+(assistant|tool|skill)\s*$",
43
+ r"^universal\s+(assistant|tool|skill)\s*$",
44
+ r"^default\s+(assistant|tool|skill)\s*$",
45
+ r"^use\s+(this|me)\s+for\s+(everything|anything)\s*$",
46
+ ]
47
+
48
+ # Vague/generic words that shouldn't dominate a description
49
+ GENERIC_WORDS = {
50
+ "help",
51
+ "helper",
52
+ "helps",
53
+ "helping",
54
+ "assist",
55
+ "assistant",
56
+ "assists",
57
+ "assisting",
58
+ "do",
59
+ "does",
60
+ "doing",
61
+ "thing",
62
+ "things",
63
+ "stuff",
64
+ "general",
65
+ "generic",
66
+ "universal",
67
+ "any",
68
+ "anything",
69
+ "everything",
70
+ "something",
71
+ "all",
72
+ "various",
73
+ "multiple",
74
+ "many",
75
+ "useful",
76
+ "handy",
77
+ "convenient",
78
+ "tool",
79
+ "utility",
80
+ }
81
+
82
+ # Words that indicate specificity (good)
83
+ SPECIFIC_INDICATORS = {
84
+ # Actions
85
+ "convert",
86
+ "parse",
87
+ "format",
88
+ "validate",
89
+ "generate",
90
+ "analyze",
91
+ "create",
92
+ "build",
93
+ "compile",
94
+ "transform",
95
+ "extract",
96
+ "process",
97
+ "calculate",
98
+ "compute",
99
+ "summarize",
100
+ "translate",
101
+ "encode",
102
+ "decode",
103
+ # Domains
104
+ "json",
105
+ "yaml",
106
+ "xml",
107
+ "csv",
108
+ "markdown",
109
+ "html",
110
+ "css",
111
+ "sql",
112
+ "python",
113
+ "javascript",
114
+ "typescript",
115
+ "rust",
116
+ "go",
117
+ "java",
118
+ "api",
119
+ "database",
120
+ "file",
121
+ "image",
122
+ "pdf",
123
+ "document",
124
+ "git",
125
+ "docker",
126
+ "kubernetes",
127
+ "aws",
128
+ "azure",
129
+ "gcp",
130
+ # Specific nouns
131
+ "code",
132
+ "test",
133
+ "documentation",
134
+ "report",
135
+ "log",
136
+ "config",
137
+ "user",
138
+ "data",
139
+ "request",
140
+ "response",
141
+ "error",
142
+ "exception",
143
+ }
144
+
145
+ def __init__(self):
146
+ """Initialize trigger analyzer."""
147
+ super().__init__("trigger_analyzer")
148
+ self._compiled_patterns = [re.compile(p, re.IGNORECASE) for p in self.GENERIC_PATTERNS]
149
+
150
+ def analyze(self, skill: Skill) -> list[Finding]:
151
+ """
152
+ Analyze skill for trigger specificity issues.
153
+
154
+ Args:
155
+ skill: Skill to analyze
156
+
157
+ Returns:
158
+ List of findings related to trigger issues
159
+ """
160
+ findings = []
161
+
162
+ # Check for generic patterns in description
163
+ findings.extend(self._check_generic_patterns(skill))
164
+
165
+ # Check description word count and specificity
166
+ findings.extend(self._check_description_specificity(skill))
167
+
168
+ # Check for keyword baiting (SEO-style stuffing)
169
+ findings.extend(self._check_keyword_baiting(skill))
170
+
171
+ return findings
172
+
173
+ def _check_generic_patterns(self, skill: Skill) -> list[Finding]:
174
+ """Check if description matches known generic patterns."""
175
+ findings = []
176
+ description = skill.description.strip()
177
+
178
+ for pattern in self._compiled_patterns:
179
+ if pattern.match(description):
180
+ findings.append(
181
+ Finding(
182
+ id=f"TRIGGER_GENERIC_{hash(description) & 0xFFFFFFFF:08x}",
183
+ rule_id="TRIGGER_OVERLY_GENERIC",
184
+ category=ThreatCategory.SOCIAL_ENGINEERING,
185
+ severity=Severity.MEDIUM,
186
+ title="Skill description is overly generic",
187
+ description=(
188
+ f"Description '{description[:50]}...' matches a generic pattern. "
189
+ f"This may cause the skill to trigger for unrelated user requests, "
190
+ f"potentially hijacking conversations."
191
+ ),
192
+ file_path="SKILL.md",
193
+ remediation=(
194
+ "Make the description more specific by describing exactly what the skill does, "
195
+ "what inputs it accepts, and what outputs it produces."
196
+ ),
197
+ analyzer="trigger",
198
+ )
199
+ )
200
+ break # One finding per skill is enough
201
+
202
+ return findings
203
+
204
+ def _check_description_specificity(self, skill: Skill) -> list[Finding]:
205
+ """Check if description has sufficient specificity."""
206
+ findings = []
207
+ description = skill.description.strip()
208
+
209
+ # Tokenize description
210
+ words = re.findall(r"\b[a-zA-Z]+\b", description.lower())
211
+
212
+ # Check word count
213
+ if len(words) < 5:
214
+ findings.append(
215
+ Finding(
216
+ id=f"TRIGGER_SHORT_{hash(description) & 0xFFFFFFFF:08x}",
217
+ rule_id="TRIGGER_DESCRIPTION_TOO_SHORT",
218
+ category=ThreatCategory.SOCIAL_ENGINEERING,
219
+ severity=Severity.LOW,
220
+ title="Skill description is too short",
221
+ description=(
222
+ f"Description has only {len(words)} words. "
223
+ f"Short descriptions may not provide enough context for Claude to determine "
224
+ f"when this skill should be used."
225
+ ),
226
+ file_path="SKILL.md",
227
+ remediation=(
228
+ "Expand the description to at least 10-20 words explaining the skill's "
229
+ "purpose, capabilities, and appropriate use cases."
230
+ ),
231
+ analyzer="trigger",
232
+ )
233
+ )
234
+ return findings # Don't check further for very short descriptions
235
+
236
+ # Calculate specificity ratio
237
+ generic_count = sum(1 for w in words if w in self.GENERIC_WORDS)
238
+ specific_count = sum(1 for w in words if w in self.SPECIFIC_INDICATORS)
239
+
240
+ generic_ratio = generic_count / len(words) if words else 0
241
+
242
+ # If more than 40% of words are generic, flag it
243
+ if generic_ratio > 0.4 and specific_count < 2:
244
+ findings.append(
245
+ Finding(
246
+ id=f"TRIGGER_VAGUE_{hash(description) & 0xFFFFFFFF:08x}",
247
+ rule_id="TRIGGER_VAGUE_DESCRIPTION",
248
+ category=ThreatCategory.SOCIAL_ENGINEERING,
249
+ severity=Severity.LOW,
250
+ title="Skill description lacks specificity",
251
+ description=(
252
+ f"Description contains {generic_count} generic words ({generic_ratio:.0%}) "
253
+ f"and only {specific_count} specific indicators. "
254
+ f"This may cause imprecise skill matching."
255
+ ),
256
+ file_path="SKILL.md",
257
+ remediation=(
258
+ "Replace generic terms with specific technical terms that describe "
259
+ "exactly what file types, technologies, or operations this skill handles."
260
+ ),
261
+ analyzer="trigger",
262
+ )
263
+ )
264
+
265
+ return findings
266
+
267
+ def _check_keyword_baiting(self, skill: Skill) -> list[Finding]:
268
+ """Check for keyword stuffing / SEO-style baiting."""
269
+ findings = []
270
+ description = skill.description.strip()
271
+
272
+ # Look for comma-separated lists of 8+ keywords (not just 5)
273
+ # Also require the list to be at the START of description (SEO style)
274
+ # or contain repeated/similar words
275
+ keyword_lists = re.findall(r"[a-zA-Z]+(?:\s*,\s*[a-zA-Z]+){7,}", description)
276
+
277
+ # Only flag if the list is suspiciously long AND at the start
278
+ # OR contains repetitive patterns
279
+ if keyword_lists:
280
+ # Check if this is a legitimate "examples include" list
281
+ context_before = description[: description.find(keyword_lists[0])].lower()
282
+ if "example" in context_before or "such as" in context_before or "including" in context_before:
283
+ # This is likely a legitimate examples list, not keyword baiting
284
+ return findings
285
+
286
+ # Check for repetitive words in the list
287
+ words = [w.strip().lower() for w in keyword_lists[0].split(",")]
288
+ unique_ratio = len(set(words)) / len(words) if words else 1
289
+
290
+ # Only flag if many repeated words (ratio < 0.7) or list is at very start
291
+ if unique_ratio < 0.7 or description.strip().startswith(keyword_lists[0][:20]):
292
+ findings.append(
293
+ Finding(
294
+ id=f"TRIGGER_KEYWORD_BAIT_{hash(description) & 0xFFFFFFFF:08x}",
295
+ rule_id="TRIGGER_KEYWORD_BAITING",
296
+ category=ThreatCategory.SOCIAL_ENGINEERING,
297
+ severity=Severity.MEDIUM,
298
+ title="Skill description may contain keyword baiting",
299
+ description=(
300
+ "Description contains suspiciously long keyword list "
301
+ "that may be an attempt to trigger the skill for many unrelated queries."
302
+ ),
303
+ file_path="SKILL.md",
304
+ remediation=(
305
+ "Replace keyword lists with natural language sentences that describe "
306
+ "the skill's actual capabilities."
307
+ ),
308
+ analyzer="trigger",
309
+ )
310
+ )
311
+
312
+ return findings
313
+
314
+ def get_specificity_score(self, description: str) -> float:
315
+ """
316
+ Calculate a specificity score for a description.
317
+
318
+ Args:
319
+ description: The skill description text
320
+
321
+ Returns:
322
+ Score from 0.0 (very generic) to 1.0 (very specific)
323
+ """
324
+ words = re.findall(r"\b[a-zA-Z]+\b", description.lower())
325
+ if not words:
326
+ return 0.0
327
+
328
+ generic_count = sum(1 for w in words if w in self.GENERIC_WORDS)
329
+ specific_count = sum(1 for w in words if w in self.SPECIFIC_INDICATORS)
330
+
331
+ # Base score from word count (more words = more specific, up to a point)
332
+ word_score = min(len(words) / 20, 1.0)
333
+
334
+ # Penalty for generic words
335
+ generic_penalty = generic_count / len(words) if words else 0
336
+
337
+ # Bonus for specific words
338
+ specific_bonus = min(specific_count / 5, 0.5)
339
+
340
+ score = word_score - generic_penalty + specific_bonus
341
+ return max(0.0, min(1.0, score))