cisco-ai-skill-scanner 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. cisco_ai_skill_scanner-1.0.0.dist-info/METADATA +253 -0
  2. cisco_ai_skill_scanner-1.0.0.dist-info/RECORD +100 -0
  3. cisco_ai_skill_scanner-1.0.0.dist-info/WHEEL +4 -0
  4. cisco_ai_skill_scanner-1.0.0.dist-info/entry_points.txt +4 -0
  5. cisco_ai_skill_scanner-1.0.0.dist-info/licenses/LICENSE +17 -0
  6. skillanalyzer/__init__.py +45 -0
  7. skillanalyzer/_version.py +34 -0
  8. skillanalyzer/api/__init__.py +25 -0
  9. skillanalyzer/api/api.py +34 -0
  10. skillanalyzer/api/api_cli.py +78 -0
  11. skillanalyzer/api/api_server.py +634 -0
  12. skillanalyzer/api/router.py +527 -0
  13. skillanalyzer/cli/__init__.py +25 -0
  14. skillanalyzer/cli/cli.py +816 -0
  15. skillanalyzer/config/__init__.py +26 -0
  16. skillanalyzer/config/config.py +149 -0
  17. skillanalyzer/config/config_parser.py +122 -0
  18. skillanalyzer/config/constants.py +85 -0
  19. skillanalyzer/core/__init__.py +24 -0
  20. skillanalyzer/core/analyzers/__init__.py +75 -0
  21. skillanalyzer/core/analyzers/aidefense_analyzer.py +872 -0
  22. skillanalyzer/core/analyzers/base.py +53 -0
  23. skillanalyzer/core/analyzers/behavioral/__init__.py +30 -0
  24. skillanalyzer/core/analyzers/behavioral/alignment/__init__.py +45 -0
  25. skillanalyzer/core/analyzers/behavioral/alignment/alignment_llm_client.py +240 -0
  26. skillanalyzer/core/analyzers/behavioral/alignment/alignment_orchestrator.py +216 -0
  27. skillanalyzer/core/analyzers/behavioral/alignment/alignment_prompt_builder.py +422 -0
  28. skillanalyzer/core/analyzers/behavioral/alignment/alignment_response_validator.py +136 -0
  29. skillanalyzer/core/analyzers/behavioral/alignment/threat_vulnerability_classifier.py +198 -0
  30. skillanalyzer/core/analyzers/behavioral_analyzer.py +453 -0
  31. skillanalyzer/core/analyzers/cross_skill_analyzer.py +490 -0
  32. skillanalyzer/core/analyzers/llm_analyzer.py +440 -0
  33. skillanalyzer/core/analyzers/llm_prompt_builder.py +270 -0
  34. skillanalyzer/core/analyzers/llm_provider_config.py +215 -0
  35. skillanalyzer/core/analyzers/llm_request_handler.py +284 -0
  36. skillanalyzer/core/analyzers/llm_response_parser.py +81 -0
  37. skillanalyzer/core/analyzers/meta_analyzer.py +845 -0
  38. skillanalyzer/core/analyzers/static.py +1105 -0
  39. skillanalyzer/core/analyzers/trigger_analyzer.py +341 -0
  40. skillanalyzer/core/analyzers/virustotal_analyzer.py +463 -0
  41. skillanalyzer/core/exceptions.py +77 -0
  42. skillanalyzer/core/loader.py +377 -0
  43. skillanalyzer/core/models.py +300 -0
  44. skillanalyzer/core/reporters/__init__.py +26 -0
  45. skillanalyzer/core/reporters/json_reporter.py +65 -0
  46. skillanalyzer/core/reporters/markdown_reporter.py +209 -0
  47. skillanalyzer/core/reporters/sarif_reporter.py +246 -0
  48. skillanalyzer/core/reporters/table_reporter.py +195 -0
  49. skillanalyzer/core/rules/__init__.py +19 -0
  50. skillanalyzer/core/rules/patterns.py +165 -0
  51. skillanalyzer/core/rules/yara_scanner.py +157 -0
  52. skillanalyzer/core/scanner.py +437 -0
  53. skillanalyzer/core/static_analysis/__init__.py +27 -0
  54. skillanalyzer/core/static_analysis/cfg/__init__.py +21 -0
  55. skillanalyzer/core/static_analysis/cfg/builder.py +439 -0
  56. skillanalyzer/core/static_analysis/context_extractor.py +742 -0
  57. skillanalyzer/core/static_analysis/dataflow/__init__.py +25 -0
  58. skillanalyzer/core/static_analysis/dataflow/forward_analysis.py +715 -0
  59. skillanalyzer/core/static_analysis/interprocedural/__init__.py +21 -0
  60. skillanalyzer/core/static_analysis/interprocedural/call_graph_analyzer.py +406 -0
  61. skillanalyzer/core/static_analysis/interprocedural/cross_file_analyzer.py +190 -0
  62. skillanalyzer/core/static_analysis/parser/__init__.py +21 -0
  63. skillanalyzer/core/static_analysis/parser/python_parser.py +380 -0
  64. skillanalyzer/core/static_analysis/semantic/__init__.py +28 -0
  65. skillanalyzer/core/static_analysis/semantic/name_resolver.py +206 -0
  66. skillanalyzer/core/static_analysis/semantic/type_analyzer.py +200 -0
  67. skillanalyzer/core/static_analysis/taint/__init__.py +21 -0
  68. skillanalyzer/core/static_analysis/taint/tracker.py +252 -0
  69. skillanalyzer/core/static_analysis/types/__init__.py +36 -0
  70. skillanalyzer/data/__init__.py +30 -0
  71. skillanalyzer/data/prompts/boilerplate_protection_rule_prompt.md +26 -0
  72. skillanalyzer/data/prompts/code_alignment_threat_analysis_prompt.md +901 -0
  73. skillanalyzer/data/prompts/llm_response_schema.json +71 -0
  74. skillanalyzer/data/prompts/skill_meta_analysis_prompt.md +303 -0
  75. skillanalyzer/data/prompts/skill_threat_analysis_prompt.md +263 -0
  76. skillanalyzer/data/prompts/unified_response_schema.md +97 -0
  77. skillanalyzer/data/rules/signatures.yaml +440 -0
  78. skillanalyzer/data/yara_rules/autonomy_abuse.yara +66 -0
  79. skillanalyzer/data/yara_rules/code_execution.yara +61 -0
  80. skillanalyzer/data/yara_rules/coercive_injection.yara +115 -0
  81. skillanalyzer/data/yara_rules/command_injection.yara +54 -0
  82. skillanalyzer/data/yara_rules/credential_harvesting.yara +115 -0
  83. skillanalyzer/data/yara_rules/prompt_injection.yara +71 -0
  84. skillanalyzer/data/yara_rules/script_injection.yara +83 -0
  85. skillanalyzer/data/yara_rules/skill_discovery_abuse.yara +57 -0
  86. skillanalyzer/data/yara_rules/sql_injection.yara +73 -0
  87. skillanalyzer/data/yara_rules/system_manipulation.yara +65 -0
  88. skillanalyzer/data/yara_rules/tool_chaining_abuse.yara +60 -0
  89. skillanalyzer/data/yara_rules/transitive_trust_abuse.yara +73 -0
  90. skillanalyzer/data/yara_rules/unicode_steganography.yara +65 -0
  91. skillanalyzer/hooks/__init__.py +21 -0
  92. skillanalyzer/hooks/pre_commit.py +450 -0
  93. skillanalyzer/threats/__init__.py +25 -0
  94. skillanalyzer/threats/threats.py +480 -0
  95. skillanalyzer/utils/__init__.py +28 -0
  96. skillanalyzer/utils/command_utils.py +129 -0
  97. skillanalyzer/utils/di_container.py +154 -0
  98. skillanalyzer/utils/file_utils.py +86 -0
  99. skillanalyzer/utils/logging_config.py +96 -0
  100. skillanalyzer/utils/logging_utils.py +71 -0
@@ -0,0 +1,490 @@
1
+ # Copyright 2026 Cisco Systems, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ # SPDX-License-Identifier: Apache-2.0
16
+
17
+ """
18
+ Cross-skill analyzer for detecting coordinated attacks across multiple skills.
19
+
20
+ This analyzer looks for patterns that suggest multiple skills are working together
21
+ to perform malicious activities, such as:
22
+ - Data relay patterns (one skill collects data, another exfiltrates)
23
+ - Shared external URLs across skills
24
+ - Complementary trigger descriptions
25
+ """
26
+
27
+ import re
28
+
29
+ from ..models import Finding, Severity, Skill, ThreatCategory
30
+ from .base import BaseAnalyzer
31
+
32
+
33
+ class CrossSkillAnalyzer(BaseAnalyzer):
34
+ """
35
+ Analyzes multiple skills together to detect coordinated attack patterns.
36
+
37
+ This analyzer is designed to be run on a collection of skills rather than
38
+ a single skill, looking for suspicious patterns that only emerge when
39
+ analyzing skills in relation to each other.
40
+ """
41
+
42
+ def __init__(self):
43
+ """Initialize cross-skill analyzer."""
44
+ super().__init__("cross_skill_analyzer")
45
+ self._skills: list[Skill] = []
46
+
47
+ def analyze(self, skill: Skill) -> list[Finding]:
48
+ """
49
+ Analyze a single skill (no-op for cross-skill analyzer).
50
+
51
+ This analyzer only produces findings when analyzing skill sets.
52
+ Call analyze_skill_set() instead.
53
+ """
54
+ return []
55
+
56
+ def analyze_skill_set(self, skills: list[Skill]) -> list[Finding]:
57
+ """
58
+ Analyze a set of skills for coordinated attack patterns.
59
+
60
+ Args:
61
+ skills: List of skills to analyze together
62
+
63
+ Returns:
64
+ List of findings related to cross-skill patterns
65
+ """
66
+ if len(skills) < 2:
67
+ return []
68
+
69
+ self._skills = skills
70
+ findings = []
71
+
72
+ # Detection 1: Data relay patterns
73
+ findings.extend(self._detect_data_relay_pattern())
74
+
75
+ # Detection 2: Shared external URLs
76
+ findings.extend(self._detect_shared_external_urls())
77
+
78
+ # Detection 3: Complementary triggers
79
+ findings.extend(self._detect_complementary_triggers())
80
+
81
+ # Detection 4: Shared suspicious code patterns
82
+ findings.extend(self._detect_shared_suspicious_patterns())
83
+
84
+ return findings
85
+
86
+ def _detect_data_relay_pattern(self) -> list[Finding]:
87
+ """
88
+ Detect data relay patterns where one skill collects data and another exfiltrates.
89
+
90
+ Pattern: Skill A reads credentials/sensitive data, Skill B sends to network.
91
+ """
92
+ findings = []
93
+
94
+ # Categorize skills by behavior
95
+ collectors: list[tuple[Skill, set[str]]] = [] # Skills that read sensitive data
96
+ exfiltrators: list[tuple[Skill, set[str]]] = [] # Skills with network output
97
+
98
+ # Patterns that indicate data collection
99
+ COLLECTION_PATTERNS = [
100
+ r"credential",
101
+ r"password",
102
+ r"secret",
103
+ r"api[_-]?key",
104
+ r"token",
105
+ r"\.env",
106
+ r"config",
107
+ r"ssh",
108
+ r"private",
109
+ r"\.pem",
110
+ r"~/.ssh",
111
+ r"/etc/passwd",
112
+ r"/etc/shadow",
113
+ r"keychain",
114
+ r"wallet",
115
+ r"cookie",
116
+ ]
117
+
118
+ # Patterns that indicate network exfiltration
119
+ EXFIL_PATTERNS = [
120
+ r"requests\.(post|put)",
121
+ r"urllib\.request",
122
+ r"httpx\.(post|put)",
123
+ r"socket\.send",
124
+ r"aiohttp.*post",
125
+ r"webhook",
126
+ r"discord\.com/api/webhooks",
127
+ r"ngrok",
128
+ r"localhost\.run",
129
+ ]
130
+
131
+ for skill in self._skills:
132
+ skill_content = self._get_skill_content(skill)
133
+
134
+ # Check for collection patterns
135
+ collection_hits = set()
136
+ for pattern in COLLECTION_PATTERNS:
137
+ if re.search(pattern, skill_content, re.IGNORECASE):
138
+ collection_hits.add(pattern)
139
+ if collection_hits:
140
+ collectors.append((skill, collection_hits))
141
+
142
+ # Check for exfiltration patterns
143
+ exfil_hits = set()
144
+ for pattern in EXFIL_PATTERNS:
145
+ if re.search(pattern, skill_content, re.IGNORECASE):
146
+ exfil_hits.add(pattern)
147
+ if exfil_hits:
148
+ exfiltrators.append((skill, exfil_hits))
149
+
150
+ # Flag if we have both collectors and exfiltrators
151
+ if collectors and exfiltrators:
152
+ collector_names = [s.name for s, _ in collectors]
153
+ exfil_names = [s.name for s, _ in exfiltrators]
154
+
155
+ # Only flag if they are different skills
156
+ if set(collector_names) != set(exfil_names):
157
+ findings.append(
158
+ Finding(
159
+ id=f"CROSS_SKILL_RELAY_{hash(tuple(collector_names + exfil_names)) & 0xFFFFFFFF:08x}",
160
+ rule_id="CROSS_SKILL_DATA_RELAY",
161
+ category=ThreatCategory.DATA_EXFILTRATION,
162
+ severity=Severity.HIGH,
163
+ title="Potential data relay attack pattern detected",
164
+ description=(
165
+ f"Skills appear to form a data relay chain. "
166
+ f"Collectors ({', '.join(collector_names)}) access sensitive data while "
167
+ f"exfiltrators ({', '.join(exfil_names)}) send data to external destinations. "
168
+ f"This pattern may indicate a coordinated attack."
169
+ ),
170
+ file_path="(cross-skill analysis)",
171
+ remediation=(
172
+ "Review these skills together to ensure they are not collaborating "
173
+ "to exfiltrate sensitive data. Consider disabling one or both skills."
174
+ ),
175
+ analyzer="cross_skill",
176
+ metadata={
177
+ "collectors": collector_names,
178
+ "exfiltrators": exfil_names,
179
+ },
180
+ )
181
+ )
182
+
183
+ return findings
184
+
185
+ def _detect_shared_external_urls(self) -> list[Finding]:
186
+ """
187
+ Detect skills that reference the same external URLs.
188
+
189
+ Multiple skills pointing to the same external resource may indicate
190
+ coordinated command-and-control or exfiltration.
191
+ """
192
+ findings = []
193
+
194
+ # Extract URLs from each skill
195
+ skill_urls: dict[str, list[str]] = {} # URL -> list of skill names
196
+
197
+ for skill in self._skills:
198
+ content = self._get_skill_content(skill)
199
+ urls = self._extract_urls(content)
200
+
201
+ for url in urls:
202
+ # Normalize URL (remove path, keep domain)
203
+ domain = self._extract_domain(url)
204
+ if domain and not self._is_common_domain(domain):
205
+ if domain not in skill_urls:
206
+ skill_urls[domain] = []
207
+ if skill.name not in skill_urls[domain]:
208
+ skill_urls[domain].append(skill.name)
209
+
210
+ # Flag domains referenced by multiple skills
211
+ for domain, skill_names in skill_urls.items():
212
+ if len(skill_names) >= 2:
213
+ findings.append(
214
+ Finding(
215
+ id=f"CROSS_SKILL_URL_{hash(domain) & 0xFFFFFFFF:08x}",
216
+ rule_id="CROSS_SKILL_SHARED_URL",
217
+ category=ThreatCategory.DATA_EXFILTRATION,
218
+ severity=Severity.MEDIUM,
219
+ title="Multiple skills reference the same external domain",
220
+ description=(
221
+ f"Domain '{domain}' is referenced by {len(skill_names)} skills: "
222
+ f"{', '.join(skill_names)}. Multiple skills pointing to the same "
223
+ f"external resource may indicate coordinated C2 or exfiltration."
224
+ ),
225
+ file_path="(cross-skill analysis)",
226
+ remediation=(
227
+ "Review why multiple skills reference this domain and ensure "
228
+ "it is a legitimate, trusted resource."
229
+ ),
230
+ analyzer="cross_skill",
231
+ metadata={
232
+ "domain": domain,
233
+ "skills": skill_names,
234
+ },
235
+ )
236
+ )
237
+
238
+ return findings
239
+
240
+ def _detect_complementary_triggers(self) -> list[Finding]:
241
+ """
242
+ Detect skills with complementary trigger descriptions.
243
+
244
+ Pattern: One skill designed to collect, another to exfiltrate,
245
+ with descriptions that suggest they work together.
246
+ """
247
+ findings = []
248
+
249
+ # Keywords that suggest data collection
250
+ COLLECTION_KEYWORDS = {
251
+ "gather",
252
+ "collect",
253
+ "read",
254
+ "scan",
255
+ "find",
256
+ "search",
257
+ "extract",
258
+ "parse",
259
+ "load",
260
+ "get",
261
+ "fetch",
262
+ "retrieve",
263
+ }
264
+
265
+ # Keywords that suggest data sending
266
+ SENDING_KEYWORDS = {
267
+ "send",
268
+ "upload",
269
+ "post",
270
+ "submit",
271
+ "transfer",
272
+ "sync",
273
+ "backup",
274
+ "export",
275
+ "share",
276
+ "publish",
277
+ "notify",
278
+ }
279
+
280
+ collectors = []
281
+ senders = []
282
+
283
+ for skill in self._skills:
284
+ desc_lower = skill.description.lower()
285
+ desc_words = set(re.findall(r"\b[a-z]+\b", desc_lower))
286
+
287
+ if desc_words & COLLECTION_KEYWORDS:
288
+ collectors.append(skill)
289
+ if desc_words & SENDING_KEYWORDS:
290
+ senders.append(skill)
291
+
292
+ # Flag if we have complementary skills
293
+ if collectors and senders:
294
+ # Check for suspicious combinations
295
+ for collector in collectors:
296
+ for sender in senders:
297
+ if collector.name != sender.name:
298
+ # Check if they might work together
299
+ coll_words = set(re.findall(r"\b[a-z]+\b", collector.description.lower()))
300
+ send_words = set(re.findall(r"\b[a-z]+\b", sender.description.lower()))
301
+
302
+ # Look for shared context words (excluding stop words and action words)
303
+ EXCLUDE_WORDS = (
304
+ COLLECTION_KEYWORDS
305
+ | SENDING_KEYWORDS
306
+ | {
307
+ "the",
308
+ "a",
309
+ "an",
310
+ "is",
311
+ "are",
312
+ "to",
313
+ "for",
314
+ "and",
315
+ "or",
316
+ "in",
317
+ "with",
318
+ }
319
+ )
320
+ shared_context = (coll_words & send_words) - EXCLUDE_WORDS
321
+
322
+ if len(shared_context) >= 2:
323
+ findings.append(
324
+ Finding(
325
+ id=f"CROSS_SKILL_COMPLEMENTARY_{hash(collector.name + sender.name) & 0xFFFFFFFF:08x}",
326
+ rule_id="CROSS_SKILL_COMPLEMENTARY_TRIGGERS",
327
+ category=ThreatCategory.SOCIAL_ENGINEERING,
328
+ severity=Severity.LOW,
329
+ title="Skills have complementary descriptions",
330
+ description=(
331
+ f"Skill '{collector.name}' (collector) and '{sender.name}' (sender) "
332
+ f"have complementary descriptions with shared context: {', '.join(shared_context)}. "
333
+ f"This may be intentional design or could indicate coordinated behavior."
334
+ ),
335
+ file_path="(cross-skill analysis)",
336
+ remediation="Review these skills to ensure they are not designed to work together maliciously",
337
+ analyzer="cross_skill",
338
+ metadata={
339
+ "collector": collector.name,
340
+ "sender": sender.name,
341
+ "shared_context": list(shared_context),
342
+ },
343
+ )
344
+ )
345
+
346
+ return findings
347
+
348
+ def _detect_shared_suspicious_patterns(self) -> list[Finding]:
349
+ """
350
+ Detect skills that share suspicious code patterns.
351
+
352
+ Similar obfuscation or encoding across skills may indicate
353
+ they came from the same malicious source.
354
+ """
355
+ findings = []
356
+
357
+ # Extract suspicious patterns from each skill
358
+ SUSPICIOUS_PATTERNS = [
359
+ (r"base64\.b64decode", "base64_decode"),
360
+ (r"exec\s*\(", "exec_call"),
361
+ (r"eval\s*\(", "eval_call"),
362
+ (r"\\x[0-9a-fA-F]{2}", "hex_escape"),
363
+ (r"chr\([0-9]+\)", "chr_call"),
364
+ (r"getattr\s*\([^)]+,\s*['\"][^'\"]+['\"]\s*\)", "dynamic_getattr"),
365
+ ]
366
+
367
+ skill_patterns: dict[str, list[str]] = {} # pattern -> list of skill names
368
+
369
+ for skill in self._skills:
370
+ content = self._get_skill_content(skill)
371
+
372
+ for pattern, name in SUSPICIOUS_PATTERNS:
373
+ if re.search(pattern, content):
374
+ if name not in skill_patterns:
375
+ skill_patterns[name] = []
376
+ if skill.name not in skill_patterns[name]:
377
+ skill_patterns[name].append(skill.name)
378
+
379
+ # Flag patterns shared by multiple skills
380
+ for pattern_name, skill_names in skill_patterns.items():
381
+ if len(skill_names) >= 2:
382
+ findings.append(
383
+ Finding(
384
+ id=f"CROSS_SKILL_PATTERN_{hash(pattern_name + str(skill_names)) & 0xFFFFFFFF:08x}",
385
+ rule_id="CROSS_SKILL_SHARED_PATTERN",
386
+ category=ThreatCategory.OBFUSCATION,
387
+ severity=Severity.MEDIUM,
388
+ title="Multiple skills share suspicious code pattern",
389
+ description=(
390
+ f"Pattern '{pattern_name}' found in {len(skill_names)} skills: "
391
+ f"{', '.join(skill_names)}. Shared suspicious patterns may indicate "
392
+ f"skills from the same malicious source."
393
+ ),
394
+ file_path="(cross-skill analysis)",
395
+ remediation=(
396
+ "Review these skills carefully - shared obfuscation or encoding "
397
+ "patterns often indicate malicious intent."
398
+ ),
399
+ analyzer="cross_skill",
400
+ metadata={
401
+ "pattern": pattern_name,
402
+ "skills": skill_names,
403
+ },
404
+ )
405
+ )
406
+
407
+ return findings
408
+
409
+ def _get_skill_content(self, skill: Skill) -> str:
410
+ """Get all content from a skill as a single string."""
411
+ content_parts = [skill.description, skill.instruction_body]
412
+
413
+ for skill_file in skill.files:
414
+ try:
415
+ file_content = skill_file.read_content()
416
+ if file_content:
417
+ content_parts.append(file_content)
418
+ except Exception:
419
+ pass
420
+
421
+ return "\n".join(content_parts)
422
+
423
+ def _extract_urls(self, content: str) -> list[str]:
424
+ """Extract URLs from content."""
425
+ url_pattern = r'https?://[^\s<>"\')\]]+[^\s<>"\')\]\.,]'
426
+ return re.findall(url_pattern, content)
427
+
428
+ def _extract_domain(self, url: str) -> str:
429
+ """Extract domain from URL."""
430
+ match = re.match(r"https?://([^/]+)", url)
431
+ if match:
432
+ return match.group(1).lower()
433
+ return ""
434
+
435
+ def _is_common_domain(self, domain: str) -> bool:
436
+ """Check if domain is a common/trusted domain."""
437
+ COMMON_DOMAINS = {
438
+ # Code hosting / package registries
439
+ "github.com",
440
+ "githubusercontent.com",
441
+ "gitlab.com",
442
+ "pypi.org",
443
+ "npmjs.com",
444
+ "python.org",
445
+ "crates.io",
446
+ "rubygems.org",
447
+ "packagist.org",
448
+ # AI providers
449
+ "anthropic.com",
450
+ "openai.com",
451
+ "claude.com",
452
+ # Cloud providers
453
+ "google.com",
454
+ "googleapis.com",
455
+ "microsoft.com",
456
+ "azure.com",
457
+ "amazon.com",
458
+ "amazonaws.com",
459
+ "aws.amazon.com",
460
+ # Documentation / references
461
+ "stackoverflow.com",
462
+ "docs.python.org",
463
+ "developer.mozilla.org",
464
+ "mdn.io",
465
+ # Standards organizations & licensing
466
+ "apache.org",
467
+ "www.apache.org", # Apache license
468
+ "opensource.org", # OSI licenses
469
+ "creativecommons.org", # CC licenses
470
+ "w3.org",
471
+ "www.w3.org", # W3C standards
472
+ "ietf.org", # IETF standards
473
+ # XML/Document standards (used by Office docs)
474
+ "schemas.openxmlformats.org",
475
+ "schemas.microsoft.com",
476
+ "purl.org", # Persistent URLs for standards
477
+ "dublincore.org", # Metadata standard
478
+ "xmlsoft.org", # libxml
479
+ # CDNs (common for web templates)
480
+ "cdnjs.cloudflare.com",
481
+ "cdn.jsdelivr.net",
482
+ "unpkg.com",
483
+ "ajax.googleapis.com",
484
+ }
485
+
486
+ # Check if domain or parent domain is common
487
+ for common in COMMON_DOMAINS:
488
+ if domain == common or domain.endswith("." + common):
489
+ return True
490
+ return False