apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,182 @@
1
+ """
2
+ Cached file reader and AST parser for query answering.
3
+
4
+ Avoids re-reading and re-parsing the same files across multiple
5
+ verification questions in a single analysis session. Uses content hashes
6
+ to skip re-parsing when file content hasn't changed.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import ast
12
+ import hashlib
13
+ import logging
14
+ from pathlib import Path
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class ASTCache:
20
+ """Cache parsed ASTs and source text for project files.
21
+
22
+ Uses content hashing so unchanged files are not re-parsed on repeat runs.
23
+ """
24
+
25
+ def __init__(self, project_root: Path, max_entries: int = 500):
26
+ self._root = project_root.resolve()
27
+ self._source_cache: dict[str, str] = {}
28
+ self._ast_cache: dict[str, ast.Module | None] = {}
29
+ self._content_hashes: dict[str, str] = {}
30
+ self._max = max_entries
31
+
32
+ @property
33
+ def project_root(self) -> Path:
34
+ return self._root
35
+
36
+ def get_source(self, file_path: str) -> str | None:
37
+ """Read and cache the source text of a file.
38
+
39
+ Accepts both absolute and project-relative paths.
40
+ Uses content hashing to detect changes.
41
+ """
42
+ resolved = self._resolve(file_path)
43
+ if not resolved:
44
+ return None
45
+
46
+ key = str(resolved)
47
+
48
+ try:
49
+ raw = resolved.read_bytes()
50
+ except OSError as e:
51
+ logger.debug("Cannot read %s: %s", resolved, e)
52
+ return None
53
+
54
+ content_hash = hashlib.sha256(raw).hexdigest()
55
+ cached_hash = self._content_hashes.get(key)
56
+
57
+ if cached_hash == content_hash and key in self._source_cache:
58
+ return self._source_cache[key]
59
+
60
+ # File changed or first read — invalidate AST cache entry too
61
+ self._ast_cache.pop(key, None)
62
+
63
+ text = raw.decode("utf-8", errors="replace")
64
+ self._evict_if_needed()
65
+ self._source_cache[key] = text
66
+ self._content_hashes[key] = content_hash
67
+ return text
68
+
69
+ def get_ast(self, file_path: str) -> ast.Module | None:
70
+ """Parse and cache the AST for a Python file."""
71
+ resolved = self._resolve(file_path)
72
+ if not resolved:
73
+ return None
74
+
75
+ key = str(resolved)
76
+ if key in self._ast_cache:
77
+ return self._ast_cache[key]
78
+
79
+ source = self.get_source(file_path)
80
+ if source is None:
81
+ return None
82
+
83
+ try:
84
+ tree = ast.parse(source, filename=str(resolved))
85
+ except SyntaxError as e:
86
+ logger.debug("Cannot parse %s: %s", resolved, e)
87
+ self._ast_cache[key] = None
88
+ return None
89
+
90
+ self._ast_cache[key] = tree
91
+ return tree
92
+
93
+ def get_lines(self, file_path: str, start: int, end: int) -> list[str] | None:
94
+ """Return lines [start, end] (1-indexed inclusive) from a file."""
95
+ source = self.get_source(file_path)
96
+ if source is None:
97
+ return None
98
+ lines = source.splitlines()
99
+ s = max(0, start - 1)
100
+ e = min(len(lines), end)
101
+ return lines[s:e]
102
+
103
+ def get_function_node(
104
+ self,
105
+ file_path: str,
106
+ function_name: str,
107
+ ) -> ast.FunctionDef | ast.AsyncFunctionDef | None:
108
+ """Find a function definition by name in a file's AST."""
109
+ tree = self.get_ast(file_path)
110
+ if tree is None:
111
+ return None
112
+
113
+ for node in ast.walk(tree):
114
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
115
+ if node.name == function_name:
116
+ return node
117
+ return None
118
+
119
+ def get_class_node(self, file_path: str, class_name: str) -> ast.ClassDef | None:
120
+ """Find a class definition by name in a file's AST."""
121
+ tree = self.get_ast(file_path)
122
+ if tree is None:
123
+ return None
124
+
125
+ for node in ast.walk(tree):
126
+ if isinstance(node, ast.ClassDef) and node.name == class_name:
127
+ return node
128
+ return None
129
+
130
+ def get_function_source(self, file_path: str, function_name: str) -> str | None:
131
+ """Get the raw source text of a function."""
132
+ fn = self.get_function_node(file_path, function_name)
133
+ if fn is None:
134
+ return None
135
+
136
+ source = self.get_source(file_path)
137
+ if source is None:
138
+ return None
139
+
140
+ lines = source.splitlines()
141
+ start = fn.lineno - 1
142
+ end = fn.end_lineno or fn.lineno
143
+ return "\n".join(lines[start:end])
144
+
145
+ def invalidate(self, file_path: str | None = None) -> None:
146
+ """Clear cached entries. None clears all."""
147
+ if file_path is None:
148
+ self._source_cache.clear()
149
+ self._ast_cache.clear()
150
+ self._content_hashes.clear()
151
+ return
152
+
153
+ resolved = self._resolve(file_path)
154
+ if resolved:
155
+ key = str(resolved)
156
+ self._source_cache.pop(key, None)
157
+ self._ast_cache.pop(key, None)
158
+ self._content_hashes.pop(key, None)
159
+
160
+ # ------------------------------------------------------------------
161
+ # Internal
162
+ # ------------------------------------------------------------------
163
+
164
+ def _resolve(self, file_path: str) -> Path | None:
165
+ p = Path(file_path)
166
+ resolved = p.resolve() if p.is_absolute() else (self._root / p).resolve()
167
+
168
+ if not resolved.is_relative_to(self._root):
169
+ logger.warning("Path traversal blocked: %s (resolves outside project root)", file_path)
170
+ return None
171
+
172
+ if not resolved.is_file():
173
+ logger.debug("File not found: %s", resolved)
174
+ return None
175
+ return resolved
176
+
177
+ def _evict_if_needed(self) -> None:
178
+ while len(self._source_cache) >= self._max:
179
+ oldest_key = next(iter(self._source_cache))
180
+ del self._source_cache[oldest_key]
181
+ self._ast_cache.pop(oldest_key, None)
182
+ self._content_hashes.pop(oldest_key, None)
@@ -0,0 +1,283 @@
1
+ """
2
+ Query executor: orchestrates the verification loop.
3
+
4
+ Supports two modes:
5
+ - Connected: probe uploads manifest, polls engine for questions,
6
+ answers them, and submits responses over HTTP.
7
+ - Air-gapped: reads questions from a JSON file, answers them,
8
+ and writes responses to another JSON file.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ import time
16
+ from dataclasses import dataclass, field
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from ..cloud.client import CloudClient, QuestionItem
21
+ from .ast_cache import ASTCache
22
+ from .handlers import AnswerOutput, QuestionInput, get_handler
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ @dataclass
28
+ class ExecutionStats:
29
+ rounds_completed: int = 0
30
+ questions_answered: int = 0
31
+ errors: int = 0
32
+ elapsed_seconds: float = 0.0
33
+
34
+
35
+ @dataclass
36
+ class ExecutionResult:
37
+ analysis_id: str | None = None
38
+ final_status: str = "unknown"
39
+ stats: ExecutionStats = field(default_factory=ExecutionStats)
40
+ error: str | None = None
41
+
42
+
43
+ class QueryExecutor:
44
+ """Orchestrate the verification question/answer loop."""
45
+
46
+ def __init__(
47
+ self,
48
+ project_root: Path,
49
+ max_batches: int = 10,
50
+ poll_timeout: int = 30,
51
+ max_wait: int = 300,
52
+ ):
53
+ self._root = project_root.resolve()
54
+ self._max_batches = max_batches
55
+ self._poll_timeout = poll_timeout
56
+ self._max_wait = max_wait
57
+ self._cache = ASTCache(self._root)
58
+
59
+ # ------------------------------------------------------------------
60
+ # Connected mode
61
+ # ------------------------------------------------------------------
62
+
63
+ def run_connected(
64
+ self,
65
+ client: CloudClient,
66
+ manifest_data: dict[str, Any],
67
+ ) -> ExecutionResult:
68
+ """Upload manifest to cloud, then run the verification loop."""
69
+ start = time.monotonic()
70
+ result = ExecutionResult()
71
+
72
+ try:
73
+ analysis_id, status = client.upload_manifest(manifest_data)
74
+ result.analysis_id = analysis_id
75
+ logger.info("Upload complete: analysis_id=%s status=%s", analysis_id, status)
76
+
77
+ if status == "complete":
78
+ result.final_status = "complete"
79
+ result.stats.elapsed_seconds = time.monotonic() - start
80
+ return result
81
+
82
+ self._run_loop(client, analysis_id, result)
83
+
84
+ except Exception as e:
85
+ result.error = str(e)
86
+ result.final_status = "error"
87
+ logger.error("Connected execution failed: %s", e)
88
+
89
+ result.stats.elapsed_seconds = time.monotonic() - start
90
+ return result
91
+
92
+ def _run_loop(
93
+ self,
94
+ client: CloudClient,
95
+ analysis_id: str,
96
+ result: ExecutionResult,
97
+ ) -> None:
98
+ """Poll for questions, answer them, submit, repeat."""
99
+ for round_num in range(1, self._max_batches + 1):
100
+ logger.info("Polling for questions (round %d)...", round_num)
101
+
102
+ poll_result = client.poll_questions(
103
+ analysis_id,
104
+ poll_timeout=self._poll_timeout,
105
+ max_wait=self._max_wait,
106
+ )
107
+
108
+ if poll_result.status == "complete":
109
+ result.final_status = "complete"
110
+ logger.info("Analysis complete after %d rounds", round_num - 1)
111
+ return
112
+
113
+ if poll_result.status in ("failed", "timeout"):
114
+ result.final_status = poll_result.status
115
+ return
116
+
117
+ if poll_result.status == "verifying":
118
+ # LLM verification running, no on-demand questions yet — keep polling
119
+ logger.debug("LLM verifying, no questions yet — will poll again")
120
+ time.sleep(2)
121
+ continue
122
+
123
+ if poll_result.status != "questions_ready" or not poll_result.questions:
124
+ logger.warning("Unexpected poll status: %s", poll_result.status)
125
+ result.final_status = poll_result.status
126
+ return
127
+
128
+ answers = self._answer_batch(poll_result.questions)
129
+ result.stats.rounds_completed = round_num
130
+ result.stats.questions_answered += len(answers)
131
+
132
+ serialized = [
133
+ {
134
+ "question_id": a.question_id,
135
+ "result": a.result,
136
+ "evidence": a.evidence,
137
+ "confidence": a.confidence,
138
+ "details": a.details,
139
+ }
140
+ for a in answers
141
+ ]
142
+
143
+ submit_result = client.submit_answers(
144
+ analysis_id,
145
+ poll_result.batch_id or "",
146
+ serialized,
147
+ )
148
+
149
+ if not submit_result.more_questions:
150
+ if submit_result.status == "complete":
151
+ result.final_status = "complete"
152
+ logger.info("Analysis complete after round %d", round_num)
153
+ return
154
+ if submit_result.status == "verifying":
155
+ # LLM verification in progress — keep polling for on-demand questions
156
+ logger.info(
157
+ "LLM verification in progress, continuing to poll for on-demand questions"
158
+ )
159
+ continue
160
+ result.final_status = submit_result.status
161
+ logger.info("No more questions; status=%s", submit_result.status)
162
+ return
163
+
164
+ result.final_status = "max_batches_reached"
165
+ logger.warning("Max batches (%d) reached", self._max_batches)
166
+
167
+ # ------------------------------------------------------------------
168
+ # Air-gapped mode
169
+ # ------------------------------------------------------------------
170
+
171
+ def run_airgapped(
172
+ self,
173
+ questions_file: Path,
174
+ output_file: Path,
175
+ ) -> ExecutionResult:
176
+ """Read questions from file, answer them, write answers to file."""
177
+ start = time.monotonic()
178
+ result = ExecutionResult()
179
+
180
+ try:
181
+ raw = json.loads(questions_file.read_text())
182
+
183
+ questions_data = raw if isinstance(raw, list) else raw.get("questions", [])
184
+ batch_id = raw.get("batch_id") if isinstance(raw, dict) else None
185
+ analysis_id = raw.get("analysis_id") if isinstance(raw, dict) else None
186
+ result.analysis_id = analysis_id
187
+
188
+ question_items = [
189
+ QuestionItem(
190
+ id=q["id"],
191
+ type=q["type"],
192
+ gate_id=q.get("gate_id", ""),
193
+ finding_id=q.get("finding_id", ""),
194
+ round_number=q.get("round_number", 1),
195
+ target_file=q.get("target_file"),
196
+ target_function=q.get("target_function"),
197
+ target_line=q.get("target_line"),
198
+ params=q.get("params", {}),
199
+ )
200
+ for q in questions_data
201
+ ]
202
+
203
+ answers = self._answer_batch(question_items)
204
+ result.stats.questions_answered = len(answers)
205
+ result.stats.rounds_completed = 1
206
+
207
+ output = {
208
+ "batch_id": batch_id,
209
+ "analysis_id": analysis_id,
210
+ "answers": [
211
+ {
212
+ "question_id": a.question_id,
213
+ "result": a.result,
214
+ "evidence": a.evidence,
215
+ "confidence": a.confidence,
216
+ "details": a.details,
217
+ }
218
+ for a in answers
219
+ ],
220
+ }
221
+
222
+ output_file.parent.mkdir(parents=True, exist_ok=True)
223
+ output_file.write_text(json.dumps(output, indent=2, default=str))
224
+
225
+ result.final_status = "complete"
226
+ logger.info(
227
+ "Air-gapped: answered %d questions -> %s",
228
+ len(answers),
229
+ output_file,
230
+ )
231
+
232
+ except Exception as e:
233
+ result.error = str(e)
234
+ result.final_status = "error"
235
+ logger.error("Air-gapped execution failed: %s", e)
236
+
237
+ result.stats.elapsed_seconds = time.monotonic() - start
238
+ return result
239
+
240
+ # ------------------------------------------------------------------
241
+ # Shared question answering
242
+ # ------------------------------------------------------------------
243
+
244
+ def _answer_batch(self, questions: list[QuestionItem]) -> list[AnswerOutput]:
245
+ answers: list[AnswerOutput] = []
246
+
247
+ for q in questions:
248
+ question_input = QuestionInput(
249
+ id=q.id,
250
+ type=q.type,
251
+ gate_id=q.gate_id,
252
+ finding_id=q.finding_id,
253
+ target_file=q.target_file,
254
+ target_function=q.target_function,
255
+ target_line=q.target_line,
256
+ params=q.params,
257
+ )
258
+
259
+ handler = get_handler(q.type)
260
+ if handler is None:
261
+ answers.append(
262
+ AnswerOutput(
263
+ question_id=q.id,
264
+ result="error",
265
+ details=f"No handler for question type: {q.type}",
266
+ )
267
+ )
268
+ continue
269
+
270
+ try:
271
+ answer = handler.handle(question_input, self._cache)
272
+ answers.append(answer)
273
+ except Exception as e:
274
+ logger.error("Handler error for question %s: %s", q.id, e)
275
+ answers.append(
276
+ AnswerOutput(
277
+ question_id=q.id,
278
+ result="error",
279
+ details=str(e),
280
+ )
281
+ )
282
+
283
+ return answers