classifyre-cli 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. classifyre_cli-0.4.2.dist-info/METADATA +167 -0
  2. classifyre_cli-0.4.2.dist-info/RECORD +101 -0
  3. classifyre_cli-0.4.2.dist-info/WHEEL +4 -0
  4. classifyre_cli-0.4.2.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/detectors/__init__.py +105 -0
  7. src/detectors/base.py +97 -0
  8. src/detectors/broken_links/__init__.py +3 -0
  9. src/detectors/broken_links/detector.py +280 -0
  10. src/detectors/config.py +59 -0
  11. src/detectors/content/__init__.py +0 -0
  12. src/detectors/custom/__init__.py +13 -0
  13. src/detectors/custom/detector.py +45 -0
  14. src/detectors/custom/runners/__init__.py +56 -0
  15. src/detectors/custom/runners/_base.py +177 -0
  16. src/detectors/custom/runners/_factory.py +51 -0
  17. src/detectors/custom/runners/_feature_extraction.py +138 -0
  18. src/detectors/custom/runners/_gliner2.py +324 -0
  19. src/detectors/custom/runners/_image_classification.py +98 -0
  20. src/detectors/custom/runners/_llm.py +22 -0
  21. src/detectors/custom/runners/_object_detection.py +107 -0
  22. src/detectors/custom/runners/_regex.py +147 -0
  23. src/detectors/custom/runners/_text_classification.py +109 -0
  24. src/detectors/custom/trainer.py +293 -0
  25. src/detectors/dependencies.py +109 -0
  26. src/detectors/pii/__init__.py +0 -0
  27. src/detectors/pii/detector.py +883 -0
  28. src/detectors/secrets/__init__.py +0 -0
  29. src/detectors/secrets/detector.py +399 -0
  30. src/detectors/threat/__init__.py +0 -0
  31. src/detectors/threat/code_security_detector.py +206 -0
  32. src/detectors/threat/yara_detector.py +177 -0
  33. src/main.py +608 -0
  34. src/models/generated_detectors.py +1296 -0
  35. src/models/generated_input.py +2732 -0
  36. src/models/generated_single_asset_scan_results.py +240 -0
  37. src/outputs/__init__.py +3 -0
  38. src/outputs/base.py +69 -0
  39. src/outputs/console.py +62 -0
  40. src/outputs/factory.py +156 -0
  41. src/outputs/file.py +83 -0
  42. src/outputs/rest.py +258 -0
  43. src/pipeline/__init__.py +7 -0
  44. src/pipeline/content_provider.py +26 -0
  45. src/pipeline/detector_pipeline.py +742 -0
  46. src/pipeline/parsed_content_provider.py +59 -0
  47. src/sandbox/__init__.py +5 -0
  48. src/sandbox/runner.py +145 -0
  49. src/sources/__init__.py +95 -0
  50. src/sources/atlassian_common.py +389 -0
  51. src/sources/azure_blob_storage/__init__.py +3 -0
  52. src/sources/azure_blob_storage/source.py +130 -0
  53. src/sources/base.py +296 -0
  54. src/sources/confluence/__init__.py +3 -0
  55. src/sources/confluence/source.py +733 -0
  56. src/sources/databricks/__init__.py +3 -0
  57. src/sources/databricks/source.py +1279 -0
  58. src/sources/dependencies.py +81 -0
  59. src/sources/google_cloud_storage/__init__.py +3 -0
  60. src/sources/google_cloud_storage/source.py +114 -0
  61. src/sources/hive/__init__.py +3 -0
  62. src/sources/hive/source.py +709 -0
  63. src/sources/jira/__init__.py +3 -0
  64. src/sources/jira/source.py +605 -0
  65. src/sources/mongodb/__init__.py +3 -0
  66. src/sources/mongodb/source.py +550 -0
  67. src/sources/mssql/__init__.py +3 -0
  68. src/sources/mssql/source.py +1034 -0
  69. src/sources/mysql/__init__.py +3 -0
  70. src/sources/mysql/source.py +797 -0
  71. src/sources/neo4j/__init__.py +0 -0
  72. src/sources/neo4j/source.py +523 -0
  73. src/sources/object_storage/base.py +679 -0
  74. src/sources/oracle/__init__.py +3 -0
  75. src/sources/oracle/source.py +982 -0
  76. src/sources/postgresql/__init__.py +3 -0
  77. src/sources/postgresql/source.py +774 -0
  78. src/sources/powerbi/__init__.py +3 -0
  79. src/sources/powerbi/source.py +774 -0
  80. src/sources/recipe_normalizer.py +179 -0
  81. src/sources/s3_compatible_storage/README.md +66 -0
  82. src/sources/s3_compatible_storage/__init__.py +3 -0
  83. src/sources/s3_compatible_storage/source.py +150 -0
  84. src/sources/servicedesk/__init__.py +3 -0
  85. src/sources/servicedesk/source.py +620 -0
  86. src/sources/slack/__init__.py +3 -0
  87. src/sources/slack/source.py +534 -0
  88. src/sources/snowflake/__init__.py +3 -0
  89. src/sources/snowflake/source.py +912 -0
  90. src/sources/tableau/__init__.py +3 -0
  91. src/sources/tableau/source.py +799 -0
  92. src/sources/tabular_utils.py +165 -0
  93. src/sources/wordpress/__init__.py +3 -0
  94. src/sources/wordpress/source.py +590 -0
  95. src/telemetry.py +96 -0
  96. src/utils/__init__.py +1 -0
  97. src/utils/content_extraction.py +108 -0
  98. src/utils/file_parser.py +777 -0
  99. src/utils/hashing.py +82 -0
  100. src/utils/uv_sync.py +79 -0
  101. src/utils/validation.py +56 -0
File without changes
@@ -0,0 +1,399 @@
1
+ """Secrets detector powered by the detect-secrets library.
2
+
3
+ Operates entirely in-memory: splits text into lines and invokes each enabled
4
+ plugin's ``analyze_line`` directly. No temp files, no global Settings state,
5
+ and no ``SecretsCollection`` needed.
6
+ """
7
+
8
+ import importlib
9
+ import logging
10
+ import pkgutil
11
+ from typing import Any
12
+
13
+ from ...models.generated_detectors import DetectorConfig, SecretsDetectorConfig, Severity
14
+ from ...models.generated_single_asset_scan_results import DetectionResult, DetectorType, Location
15
+ from ..base import BaseDetector
16
+ from ..dependencies import MissingDependencyError, require_module
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Lazy plugin discovery
22
+ # ---------------------------------------------------------------------------
23
+ # detect-secrets is an optional dependency (security group). We must NOT
24
+ # touch the package at module-import time because the CLI auto-installs it
25
+ # lazily when the detector is instantiated. _discover_plugins() is therefore
26
+ # deferred until the first call to _build_plugins().
27
+ # ---------------------------------------------------------------------------
28
+
29
+ # Mutable container avoids the need for the ``global`` keyword.
30
+ _plugin_cache: dict[str, Any] = {"_loaded": False}
31
+
32
+
33
+ def _discover_plugins() -> dict[str, tuple[str, str]]:
34
+ """Return {pattern_key: (module_path, class_name)} by scanning detect_secrets.plugins."""
35
+ import detect_secrets.plugins
36
+
37
+ # Build {class_name -> module_path} from the installed package
38
+ class_to_mod: dict[str, str] = {}
39
+ for _, mod_name, is_pkg in pkgutil.iter_modules(detect_secrets.plugins.__path__):
40
+ if is_pkg or mod_name == "base":
41
+ continue
42
+ full_mod = f"detect_secrets.plugins.{mod_name}"
43
+ try:
44
+ mod = importlib.import_module(full_mod)
45
+ for name in dir(mod):
46
+ obj = getattr(mod, name)
47
+ if isinstance(obj, type) and obj.__module__ == full_mod:
48
+ class_to_mod[name] = full_mod
49
+ except Exception:
50
+ continue
51
+
52
+ _pattern_to_class: dict[str, str] = {
53
+ "artifactory": "ArtifactoryDetector",
54
+ "aws": "AWSKeyDetector",
55
+ "azure_storage": "AzureStorageKeyDetector",
56
+ "basic_auth": "BasicAuthDetector",
57
+ "cloudant": "CloudantDetector",
58
+ "discord": "DiscordBotTokenDetector",
59
+ "github": "GitHubTokenDetector",
60
+ "gitlab": "GitLabTokenDetector",
61
+ "high_entropy_base64": "Base64HighEntropyString",
62
+ "high_entropy_hex": "HexHighEntropyString",
63
+ "ibm_cloud_iam": "IbmCloudIamDetector",
64
+ "ibm_cos_hmac": "IbmCosHmacDetector",
65
+ "ip_public": "IPPublicDetector",
66
+ "jwt": "JwtTokenDetector",
67
+ "keyword": "KeywordDetector",
68
+ "mailchimp": "MailchimpDetector",
69
+ "npm": "NpmDetector",
70
+ "openai": "OpenAIDetector",
71
+ "private_key": "PrivateKeyDetector",
72
+ "pypi": "PypiTokenDetector",
73
+ "sendgrid": "SendGridDetector",
74
+ "slack": "SlackDetector",
75
+ "softlayer": "SoftlayerDetector",
76
+ "square_oauth": "SquareOAuthDetector",
77
+ "stripe": "StripeDetector",
78
+ "telegram": "TelegramBotTokenDetector",
79
+ "twilio": "TwilioKeyDetector",
80
+ }
81
+
82
+ specs: dict[str, tuple[str, str]] = {}
83
+ for key, cls_name in _pattern_to_class.items():
84
+ mod = class_to_mod.get(cls_name)
85
+ if mod:
86
+ specs[key] = (mod, cls_name)
87
+ else:
88
+ logger.warning(
89
+ "Plugin class '%s' not found in installed detect-secrets; "
90
+ "pattern '%s' will be skipped",
91
+ cls_name,
92
+ key,
93
+ )
94
+ return specs
95
+
96
+
97
+ def _get_plugin_specs() -> dict[str, tuple[str, str]]:
98
+ """Lazy accessor for plugin specs (populated on first call)."""
99
+ if not _plugin_cache["_loaded"]:
100
+ _plugin_cache["specs"] = _discover_plugins()
101
+ _plugin_cache["defaults"] = list(_plugin_cache["specs"].keys())
102
+ _plugin_cache["_loaded"] = True
103
+ return _plugin_cache["specs"]
104
+
105
+
106
+ # Severity classification by keywords in detect-secrets finding type (lowercased).
107
+ _SEVERITY_RULES: list[tuple[Severity, list[str]]] = [
108
+ (
109
+ Severity.critical,
110
+ [
111
+ "aws",
112
+ "private key",
113
+ "github",
114
+ "gitlab",
115
+ "slack",
116
+ "stripe",
117
+ "azure storage",
118
+ "google oauth",
119
+ "openai",
120
+ ],
121
+ ),
122
+ (
123
+ Severity.high,
124
+ [
125
+ "artifactory",
126
+ "basic auth",
127
+ "cloudant",
128
+ "discord",
129
+ "ibm",
130
+ "json web token",
131
+ "mailchimp",
132
+ "npm",
133
+ "pypi",
134
+ "sendgrid",
135
+ "softlayer",
136
+ "square",
137
+ "telegram",
138
+ "twilio",
139
+ ],
140
+ ),
141
+ (Severity.medium, ["entropy", "keyword", "ip public"]),
142
+ ]
143
+
144
+ _SEVERITY_RANK: dict[Severity, int] = {
145
+ Severity.info: 0,
146
+ Severity.low: 1,
147
+ Severity.medium: 2,
148
+ Severity.high: 3,
149
+ Severity.critical: 4,
150
+ }
151
+
152
+ # Confidence by keywords in detect-secrets finding type (lowercased).
153
+ _CONFIDENCE_RULES: list[tuple[float, list[str]]] = [
154
+ (
155
+ 0.95,
156
+ [
157
+ "aws",
158
+ "github",
159
+ "gitlab",
160
+ "private key",
161
+ "slack",
162
+ "stripe",
163
+ "azure storage",
164
+ "openai",
165
+ "pypi",
166
+ ],
167
+ ),
168
+ (
169
+ 0.85,
170
+ [
171
+ "artifactory",
172
+ "basic auth",
173
+ "cloudant",
174
+ "discord",
175
+ "ibm",
176
+ "mailchimp",
177
+ "npm",
178
+ "sendgrid",
179
+ "softlayer",
180
+ "square",
181
+ "telegram",
182
+ "twilio",
183
+ ],
184
+ ),
185
+ (0.80, ["json web token"]),
186
+ (0.75, ["entropy"]),
187
+ (0.70, ["keyword", "ip public"]),
188
+ ]
189
+
190
+
191
+ class SecretsDetector(BaseDetector):
192
+ """Secrets detector backed by the detect-secrets library.
193
+
194
+ Each enabled plugin is imported and instantiated directly. Text is scanned
195
+ line-by-line in memory via ``analyze_line`` -- no temp files, no global
196
+ Settings state, and no async locking required.
197
+ """
198
+
199
+ detector_type = "secrets"
200
+ detector_name = "secrets"
201
+
202
+ def __init__(self, config: DetectorConfig | None = None):
203
+ super().__init__(config)
204
+ self._cfg: SecretsDetectorConfig = (
205
+ config if isinstance(config, SecretsDetectorConfig) else SecretsDetectorConfig()
206
+ )
207
+ # Fail fast at construction time if detect-secrets is not installed.
208
+ try:
209
+ require_module("detect_secrets", "secrets", ["security", "detectors"])
210
+ except MissingDependencyError:
211
+ raise
212
+
213
+ # ------------------------------------------------------------------
214
+ # Private helpers
215
+ # ------------------------------------------------------------------
216
+
217
+ def _enabled_pattern_names(self) -> list[str]:
218
+ """Return the list of pattern string keys to activate."""
219
+ specs = _get_plugin_specs()
220
+ defaults = list(specs.keys())
221
+
222
+ raw = self._cfg.enabled_patterns
223
+ if raw is None:
224
+ return defaults
225
+
226
+ # Unwrap Pydantic RootModel
227
+ items = raw.root if hasattr(raw, "root") else raw
228
+ if not items:
229
+ return defaults
230
+
231
+ names: list[str] = []
232
+ for item in items:
233
+ # item may be a str or a SecretsEnabledPattern enum member
234
+ name = item.value if hasattr(item, "value") else str(item)
235
+ if name in specs:
236
+ names.append(name)
237
+ else:
238
+ logger.warning("Unknown secrets pattern '%s' ignored", name)
239
+ return names
240
+
241
+ def _build_plugins(self) -> list[Any]:
242
+ """Import and instantiate each enabled detect-secrets plugin."""
243
+ specs = _get_plugin_specs()
244
+ names = self._enabled_pattern_names()
245
+ plugins: list[Any] = []
246
+
247
+ for name in names:
248
+ mod_path, cls_name = specs[name]
249
+ try:
250
+ mod = importlib.import_module(mod_path)
251
+ cls = getattr(mod, cls_name)
252
+ except Exception as exc:
253
+ logger.warning("Failed to import plugin '%s' from %s: %s", cls_name, mod_path, exc)
254
+ continue
255
+
256
+ kwargs: dict[str, Any] = {}
257
+ if name == "high_entropy_base64":
258
+ limit = self._cfg.entropy_limit_base64
259
+ if limit is not None:
260
+ kwargs["limit"] = float(limit.root if hasattr(limit, "root") else limit)
261
+ elif name == "high_entropy_hex":
262
+ limit = self._cfg.entropy_limit_hex
263
+ if limit is not None:
264
+ kwargs["limit"] = float(limit.root if hasattr(limit, "root") else limit)
265
+
266
+ try:
267
+ plugin = cls(**kwargs)
268
+ plugins.append(plugin)
269
+ logger.debug("Initialized secrets plugin: %s", cls_name)
270
+ except Exception as exc:
271
+ logger.warning("Failed to instantiate plugin '%s': %s", cls_name, exc)
272
+
273
+ return plugins
274
+
275
+ @classmethod
276
+ def _get_severity(cls, secret_type: str) -> Severity:
277
+ t = secret_type.lower()
278
+ for severity, keywords in _SEVERITY_RULES:
279
+ if any(kw in t for kw in keywords):
280
+ return severity
281
+ return Severity.high
282
+
283
+ @classmethod
284
+ def _get_confidence(cls, secret_type: str) -> float:
285
+ t = secret_type.lower()
286
+ for confidence, keywords in _CONFIDENCE_RULES:
287
+ if any(kw in t for kw in keywords):
288
+ return confidence
289
+ return 0.85
290
+
291
+ # ------------------------------------------------------------------
292
+ # Public API
293
+ # ------------------------------------------------------------------
294
+
295
+ async def detect(
296
+ self, content: str | bytes, content_type: str = "text/plain"
297
+ ) -> list[DetectionResult]:
298
+ if isinstance(content, bytes):
299
+ try:
300
+ content = content.decode("utf-8", errors="replace")
301
+ except Exception:
302
+ logger.warning(
303
+ "Secrets detector received non-decodable binary content (%d bytes) and cannot scan it",
304
+ len(content),
305
+ )
306
+ return []
307
+
308
+ plugins = self._build_plugins()
309
+ if not plugins:
310
+ return []
311
+
312
+ lines = content.splitlines()
313
+ confidence_threshold: float = self._cfg.confidence_threshold or 0.7
314
+ severity_threshold = self._cfg.severity_threshold
315
+ min_severity_rank = _SEVERITY_RANK.get(severity_threshold, 0) if severity_threshold else 0
316
+ results: list[DetectionResult] = []
317
+
318
+ for line_number, line_text in enumerate(lines, start=1):
319
+ for plugin in plugins:
320
+ try:
321
+ secrets = plugin.analyze_line(
322
+ filename="<inline>",
323
+ line=line_text,
324
+ line_number=line_number,
325
+ )
326
+ except Exception as exc:
327
+ logger.debug(
328
+ "Plugin %s failed on line %d: %s",
329
+ plugin.__class__.__name__,
330
+ line_number,
331
+ exc,
332
+ )
333
+ continue
334
+
335
+ for secret in secrets:
336
+ try:
337
+ secret_type = str(secret.type) if secret.type else ""
338
+ secret_value = (
339
+ str(secret.secret_value) if secret.secret_value is not None else ""
340
+ )
341
+ is_verified = bool(secret.is_verified)
342
+ except Exception:
343
+ continue
344
+
345
+ if not secret_type:
346
+ continue
347
+
348
+ confidence = self._get_confidence(secret_type)
349
+ if confidence < confidence_threshold:
350
+ continue
351
+
352
+ severity = self._get_severity(secret_type)
353
+ if _SEVERITY_RANK.get(severity, 0) < min_severity_rank:
354
+ continue
355
+
356
+ if not secret_value:
357
+ continue
358
+
359
+ col_offset = line_text.find(secret_value) if secret_value in line_text else 0
360
+ start = col_offset
361
+ end = start + len(secret_value)
362
+
363
+ results.append(
364
+ DetectionResult(
365
+ detector_type=DetectorType.SECRETS,
366
+ finding_type=secret_type,
367
+ category="SECRETS",
368
+ severity=severity,
369
+ confidence=confidence,
370
+ matched_content=secret_value,
371
+ location=Location(
372
+ start=start,
373
+ end=end,
374
+ line=line_number,
375
+ path=f"line {line_number}",
376
+ ),
377
+ metadata={
378
+ "detector": "secrets",
379
+ "plugin": secret_type,
380
+ "is_verified": is_verified,
381
+ },
382
+ )
383
+ )
384
+
385
+ if self._cfg.max_findings and len(results) > self._cfg.max_findings:
386
+ results = results[: self._cfg.max_findings]
387
+
388
+ return results
389
+
390
+ def get_supported_content_types(self) -> list[str]:
391
+ return [
392
+ "text/plain",
393
+ "application/json",
394
+ "application/yaml",
395
+ "application/x-yaml",
396
+ "text/yaml",
397
+ "application/xml",
398
+ "text/xml",
399
+ ]
File without changes
@@ -0,0 +1,206 @@
1
+ """Code security detector using Bandit static analysis."""
2
+
3
+ import json
4
+ import logging
5
+ import subprocess
6
+ import sys
7
+ import tempfile
8
+ from importlib.util import find_spec
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from ...models.generated_detectors import (
13
+ CodeSecurityDetectorConfig,
14
+ DetectorConfig,
15
+ GenericDetectorConfig,
16
+ Severity,
17
+ )
18
+ from ...models.generated_single_asset_scan_results import (
19
+ DetectionResult,
20
+ DetectorType,
21
+ )
22
+ from ..base import BaseDetector
23
+ from ..dependencies import MissingDependencyError, require_module
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ _SEVERITY_ORDER: dict[Severity, int] = {
28
+ Severity.info: 0,
29
+ Severity.low: 1,
30
+ Severity.medium: 2,
31
+ Severity.high: 3,
32
+ Severity.critical: 4,
33
+ }
34
+
35
+
36
+ class CodeSecurityDetector(BaseDetector):
37
+ """Detect insecure code patterns with Bandit (rule-based)."""
38
+
39
+ detector_type = "code_security"
40
+ detector_name = "code_security"
41
+
42
+ def __init__(self, config: DetectorConfig | None = None):
43
+ super().__init__(config)
44
+ self._cfg: CodeSecurityDetectorConfig | GenericDetectorConfig
45
+ if isinstance(config, CodeSecurityDetectorConfig):
46
+ self._cfg = config
47
+ elif isinstance(config, GenericDetectorConfig):
48
+ self._cfg = config
49
+ else:
50
+ self._cfg = CodeSecurityDetectorConfig()
51
+ # Importing `bandit` eagerly can trigger stevedore plugin discovery noise.
52
+ # We only verify Bandit availability here; execution happens in a subprocess.
53
+ if find_spec("bandit") is None:
54
+ try:
55
+ require_module("bandit", "code_security", ["security", "detectors"])
56
+ except MissingDependencyError:
57
+ raise
58
+
59
+ @staticmethod
60
+ def _severity_from_bandit(level: str) -> Severity:
61
+ normalized = level.upper()
62
+ if normalized == "HIGH":
63
+ return Severity.high
64
+ if normalized == "MEDIUM":
65
+ return Severity.medium
66
+ if normalized == "LOW":
67
+ return Severity.low
68
+ return Severity.info
69
+
70
+ @staticmethod
71
+ def _confidence_from_bandit(level: str) -> float:
72
+ normalized = level.upper()
73
+ if normalized == "HIGH":
74
+ return 0.95
75
+ if normalized == "MEDIUM":
76
+ return 0.8
77
+ if normalized == "LOW":
78
+ return 0.6
79
+ return 0.5
80
+
81
+ def _run_bandit_json(
82
+ self,
83
+ content: str,
84
+ skips: list[str] | None = None,
85
+ tests: list[str] | None = None,
86
+ ) -> tuple[list[dict[str, Any]], list[str]]:
87
+ with tempfile.NamedTemporaryFile(
88
+ mode="w",
89
+ suffix=".py",
90
+ encoding="utf-8",
91
+ delete=False,
92
+ ) as handle:
93
+ handle.write(content)
94
+ tmp_path = Path(handle.name)
95
+
96
+ try:
97
+ cmd = [sys.executable, "-m", "bandit", "-q", "-f", "json"]
98
+ if tests:
99
+ cmd += ["--test", ",".join(tests)]
100
+ if skips:
101
+ cmd += ["--skip", ",".join(skips)]
102
+ cmd.append(str(tmp_path))
103
+
104
+ proc = subprocess.run(
105
+ cmd,
106
+ capture_output=True,
107
+ text=True,
108
+ check=False,
109
+ )
110
+
111
+ if proc.returncode not in (0, 1):
112
+ stderr = proc.stderr.strip() or "Unknown Bandit execution error"
113
+ logger.error(f"Bandit execution failed: {stderr}")
114
+ return [], [stderr]
115
+
116
+ stdout = proc.stdout.strip() or "{}"
117
+ payload = json.loads(stdout)
118
+ if not isinstance(payload, dict):
119
+ return [], []
120
+
121
+ results = payload.get("results", [])
122
+ errors = payload.get("errors", [])
123
+ return (
124
+ [item for item in results if isinstance(item, dict)],
125
+ [str(item) for item in errors],
126
+ )
127
+ except Exception as exc:
128
+ logger.error(f"Code security scan failed: {exc}")
129
+ return [], [str(exc)]
130
+ finally:
131
+ tmp_path.unlink(missing_ok=True)
132
+
133
+ async def detect(
134
+ self, content: str | bytes, content_type: str = "text/plain"
135
+ ) -> list[DetectionResult]:
136
+ if isinstance(content, bytes):
137
+ return []
138
+ if not content.strip():
139
+ return []
140
+
141
+ threshold = self._cfg.confidence_threshold or 0.7
142
+ max_findings = self._cfg.max_findings or 25
143
+ findings: list[DetectionResult] = []
144
+
145
+ skips: list[str] | None = None
146
+ tests: list[str] | None = None
147
+ severity_threshold: Severity | None = None
148
+ if isinstance(self._cfg, CodeSecurityDetectorConfig):
149
+ skips = self._cfg.skips
150
+ tests = self._cfg.tests
151
+ severity_threshold = self._cfg.severity_threshold
152
+
153
+ issues, errors = self._run_bandit_json(content, skips=skips, tests=tests)
154
+ if not issues:
155
+ if errors:
156
+ logger.debug(f"Bandit returned no issues with errors: {errors}")
157
+ return []
158
+
159
+ min_severity_rank = _SEVERITY_ORDER.get(severity_threshold, 0) if severity_threshold else 0
160
+
161
+ for issue in issues:
162
+ confidence = self._confidence_from_bandit(str(issue.get("issue_confidence", "")))
163
+ if confidence < threshold:
164
+ continue
165
+
166
+ severity = self._severity_from_bandit(str(issue.get("issue_severity", "")))
167
+ if _SEVERITY_ORDER.get(severity, 0) < min_severity_rank:
168
+ continue
169
+
170
+ issue_text = str(issue.get("issue_text", "Potential insecure code pattern"))
171
+ code_snippet = str(issue.get("code", "")).strip()
172
+ finding_type = str(issue.get("test_id", issue.get("test_name", "code_security")))
173
+
174
+ findings.append(
175
+ DetectionResult(
176
+ detector_type=DetectorType.CODE_SECURITY,
177
+ finding_type=finding_type,
178
+ category="SECURITY",
179
+ severity=severity,
180
+ confidence=confidence,
181
+ matched_content=code_snippet or issue_text,
182
+ location=None,
183
+ metadata={
184
+ "tool": "bandit",
185
+ "issue_text": issue_text,
186
+ "test_name": issue.get("test_name"),
187
+ "test_id": issue.get("test_id"),
188
+ "issue_severity": issue.get("issue_severity"),
189
+ "issue_confidence": issue.get("issue_confidence"),
190
+ },
191
+ )
192
+ )
193
+
194
+ if len(findings) >= max_findings:
195
+ break
196
+
197
+ return findings
198
+
199
+ def get_supported_content_types(self) -> list[str]:
200
+ return [
201
+ "text/plain",
202
+ "text/html",
203
+ "text/markdown",
204
+ "application/json",
205
+ "application/octet-stream",
206
+ ]