apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1009 @@
1
+ """
2
+ Manifest schema definitions for apisec-code-bolt.
3
+
4
+ The manifest is the primary output of the Probe - a structured JSON document
5
+ containing all extracted facts about the codebase. This is sent to the Cloud
6
+ for vulnerability analysis.
7
+
8
+ Key principle: The manifest contains FACTS, not security judgments.
9
+ The Cloud applies vulnerability rules to interpret these facts.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import hashlib
15
+ from datetime import datetime
16
+ from typing import Any, Literal
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+ from .types import (
21
+ CodeLocation,
22
+ )
23
+
24
+ # =============================================================================
25
+ # Manifest Version
26
+ # =============================================================================
27
+
28
+ # 1.3: PackageDependencyModel gains an explicit `ecosystem` field so the
29
+ # reasoning engine no longer re-infers it (fragilely) from `source_file`. (F-6)
30
+ MANIFEST_VERSION = "1.3"
31
+
32
+
33
+ # =============================================================================
34
+ # Content-Addressable ID Helper
35
+ # =============================================================================
36
+
37
+
38
+ def stable_id(prefix: str, *parts: str) -> str:
39
+ """Derive a deterministic entity ID from its canonical identity parts.
40
+
41
+ Format: ``{prefix}-{sha256(part1|part2|...)[:12]}``
42
+ 12 hex chars = 48 bits, collision-free for any realistic codebase.
43
+ """
44
+ raw = "|".join(p or "" for p in parts)
45
+ digest = hashlib.sha256(raw.encode("utf-8")).hexdigest()[:12]
46
+ return f"{prefix}-{digest}"
47
+
48
+
49
+ # =============================================================================
50
+ # Code Location (Pydantic version for serialization)
51
+ # =============================================================================
52
+
53
+
54
+ class LocationModel(BaseModel):
55
+ """Code location for JSON serialization."""
56
+
57
+ file: str
58
+ line: int
59
+ column: int | None = None
60
+ end_line: int | None = None
61
+ end_column: int | None = None
62
+ function: str | None = None
63
+
64
+ @classmethod
65
+ def from_code_location(cls, loc: CodeLocation) -> LocationModel:
66
+ return cls(
67
+ file=str(loc.file),
68
+ line=loc.line,
69
+ column=loc.column,
70
+ end_line=loc.end_line,
71
+ end_column=loc.end_column,
72
+ )
73
+
74
+
75
+ # =============================================================================
76
+ # Project Metadata
77
+ # =============================================================================
78
+
79
+
80
+ class ProjectMetadata(BaseModel):
81
+ """Metadata about the analyzed project."""
82
+
83
+ root: str = Field(description="Project root directory")
84
+ name: str | None = Field(
85
+ default=None, description="Project name (from pyproject.toml, pom.xml, etc.)"
86
+ )
87
+
88
+ languages_detected: list[str] = Field(
89
+ default_factory=list, description="Programming languages found"
90
+ )
91
+ frameworks_detected: list[str] = Field(default_factory=list, description="Frameworks detected")
92
+
93
+ files_analyzed: int = Field(default=0, description="Number of files successfully analyzed")
94
+ files_skipped: int = Field(default=0, description="Number of files skipped")
95
+
96
+ analysis_duration_ms: int = Field(default=0, description="Analysis duration in milliseconds")
97
+
98
+
99
+ # =============================================================================
100
+ # Function and Class Definitions
101
+ # =============================================================================
102
+
103
+
104
+ class ParameterModel(BaseModel):
105
+ """Function/method parameter."""
106
+
107
+ name: str
108
+ type_annotation: str | None = None
109
+ default_value: str | None = None
110
+ is_variadic: bool = False # Python *args, Java varargs, JS rest params
111
+ is_keyword_variadic: bool = False # Python **kwargs, JS destructured options
112
+
113
+
114
+ class FunctionModel(BaseModel):
115
+ """Function or method definition — language-agnostic."""
116
+
117
+ id: str = Field(description="Unique identifier")
118
+ qualified_name: str = Field(description="Fully qualified name")
119
+ name: str = Field(description="Simple name")
120
+
121
+ location: LocationModel
122
+
123
+ binding: Literal["instance", "static", "free"] = Field(
124
+ default="free",
125
+ description="How the function is bound: instance method, static/class method, or free (module-level)",
126
+ )
127
+ visibility: Literal["public", "private", "protected", "internal"] | None = Field(
128
+ default=None,
129
+ description="Access control level. None when the language uses convention rather than keywords (e.g. Python)",
130
+ )
131
+ is_async: bool = False
132
+ owner_type: str | None = Field(
133
+ default=None,
134
+ description="Owning type (class, struct, impl, interface) — None for free functions",
135
+ )
136
+
137
+ parameters: list[ParameterModel] = Field(default_factory=list)
138
+ return_type: str | None = None
139
+
140
+ annotations: list[str] = Field(
141
+ default_factory=list,
142
+ description="Decorators (Python), annotations (Java/Kotlin), attributes (C#)",
143
+ )
144
+
145
+ docstring: str | None = None
146
+
147
+ content_hash: str | None = Field(
148
+ default=None,
149
+ description="SHA-256 of parameter signatures + return type + annotations + normalized body source. "
150
+ "Body is normalized (comments/blank lines stripped, whitespace collapsed) before hashing. "
151
+ "Used for change detection across analyses, not for identity.",
152
+ )
153
+
154
+
155
+ class ClassModel(BaseModel):
156
+ """Class definition."""
157
+
158
+ id: str
159
+ qualified_name: str
160
+ name: str
161
+
162
+ location: LocationModel
163
+
164
+ base_classes: list[str] = Field(default_factory=list)
165
+ annotations: list[str] = Field(
166
+ default_factory=list, description="Decorators / annotations / attributes"
167
+ )
168
+
169
+ methods: list[str] = Field(
170
+ default_factory=list, description="FunctionModel IDs of methods belonging to this class"
171
+ )
172
+
173
+ docstring: str | None = None
174
+
175
+
176
+ # =============================================================================
177
+ # HTTP Parameters
178
+ # =============================================================================
179
+
180
+
181
+ class HttpParameterModel(BaseModel):
182
+ """HTTP request parameter."""
183
+
184
+ name: str
185
+ location: str = Field(description="path, query, header, cookie, body, form")
186
+ type_annotation: str | None = None
187
+ required: bool = True
188
+ default_value: str | None = None
189
+
190
+ # Framework-specific metadata
191
+ alias: str | None = None # Alternative name in request
192
+ constraints: dict[str, Any] = Field(default_factory=dict) # Validation constraints
193
+
194
+
195
+ class BodyFieldModel(BaseModel):
196
+ """A field within a request body model."""
197
+
198
+ name: str
199
+ type_annotation: str | None = None
200
+ required: bool = True
201
+ is_complex: bool = Field(
202
+ default=False,
203
+ description="True when the type is another model/schema rather than a primitive",
204
+ )
205
+ nested_model: str | None = Field(
206
+ default=None,
207
+ description="Name of the nested schema (present when is_complex=True)",
208
+ )
209
+ constraints: dict[str, Any] = Field(default_factory=dict)
210
+
211
+
212
+ class HttpBodyModel(BaseModel):
213
+ """HTTP request body definition."""
214
+
215
+ content_type: str | None = None # application/json, multipart/form-data, etc.
216
+ model_name: str | None = None # Pydantic model, Java DTO class
217
+ model_fields: list[BodyFieldModel] = Field(default_factory=list)
218
+ required: bool = True
219
+
220
+
221
+ class SchemaFieldModel(BaseModel):
222
+ """A field within a schema definition."""
223
+
224
+ name: str
225
+ type_annotation: str | None = None
226
+ required: bool = True
227
+ default_value: str | None = None
228
+ is_complex: bool = False
229
+ nested_model: str | None = None
230
+ alias: str | None = None
231
+ constraints: dict[str, Any] = Field(default_factory=dict)
232
+
233
+
234
+ class SchemaModel(BaseModel):
235
+ """A data model / schema referenced by route bodies or other schemas."""
236
+
237
+ name: str
238
+ qualified_name: str | None = None
239
+ fields: list[SchemaFieldModel] = Field(default_factory=list)
240
+ base_classes: list[str] = Field(default_factory=list)
241
+ is_enum: bool = False
242
+ usage: list[str] = Field(
243
+ default_factory=list,
244
+ description="How this schema is used: 'request_body', 'response', 'orm', 'domain', 'config'",
245
+ )
246
+
247
+
248
+ # =============================================================================
249
+ # Entry Points (Routes)
250
+ # =============================================================================
251
+
252
+
253
+ class RouteModel(BaseModel):
254
+ """Entry point definition (HTTP route, CLI command, task, message consumer, etc.)."""
255
+
256
+ id: str = Field(description="Unique identifier (ep-001, ep-002, etc.)")
257
+
258
+ kind: str = Field(
259
+ default="http",
260
+ description=(
261
+ "Transport category for this entry point. Values are language-agnostic: "
262
+ "http, cli, task, consumer, webhook, event, lifecycle, scheduled, grpc, websocket. "
263
+ "'lifecycle' covers FastAPI/Starlette @on_event('startup'|'shutdown') and similar "
264
+ "application-lifetime hooks that cannot receive external requests."
265
+ ),
266
+ )
267
+
268
+ method: str = Field(
269
+ default="",
270
+ description="HTTP method (GET, POST, …) for http kind; empty or trigger type for non-http kinds",
271
+ )
272
+ path: str = Field(
273
+ default="",
274
+ description=(
275
+ "URL path pattern (http), command path (cli), task name (task), queue/topic (consumer). "
276
+ "GraphQL operations use the internal convention /graphql:<OperationType>.<fieldName> "
277
+ "(e.g. /graphql:Query.users, /graphql:Mutation.createUser). This is NOT a real HTTP path — "
278
+ "downstream consumers (reasoning engine, DAST) must translate it to a GraphQL request: "
279
+ 'POST /graphql with body {"query": "{ users { ... } }"}. '
280
+ "Query → GET semantics, Mutation/Subscription → POST semantics."
281
+ ),
282
+ )
283
+
284
+ path_params: list[HttpParameterModel] = Field(default_factory=list)
285
+ query_params: list[HttpParameterModel] = Field(default_factory=list)
286
+ header_params: list[HttpParameterModel] = Field(default_factory=list)
287
+ cookie_params: list[HttpParameterModel] = Field(default_factory=list)
288
+ body: HttpBodyModel | None = None
289
+
290
+ # Handler
291
+ handler_function: str = Field(description="Qualified name of handler function")
292
+ handler_location: LocationModel
293
+
294
+ # Framework metadata
295
+ framework: str
296
+ router_name: str | None = None
297
+ tags: list[str] = Field(default_factory=list)
298
+ operation_id: str | None = None
299
+ summary: str | None = None
300
+ deprecated: bool = False
301
+
302
+ # Dependencies (FastAPI Depends, Spring @Autowired, etc.)
303
+ dependencies: list[str] = Field(
304
+ default_factory=list,
305
+ description="IDs of dependency definitions used by this route",
306
+ )
307
+
308
+ # Response
309
+ response_model: str | None = None
310
+ response_status_code: int = 200
311
+
312
+ # Auth reference (populated after auth analysis)
313
+ auth_config_ref: str | None = None
314
+
315
+
316
+ # =============================================================================
317
+ # Function Calls
318
+ # =============================================================================
319
+
320
+
321
+ class ArgumentModel(BaseModel):
322
+ """Argument passed to a function call."""
323
+
324
+ position: int | None = None
325
+ name: str | None = None
326
+
327
+ value_type: str = Field(description="Type of value: literal, variable, expression, call_result")
328
+ literal_value: str | None = None
329
+ variable_name: str | None = None
330
+ expression: str | None = None
331
+
332
+ # Data flow reference (if this argument is tracked)
333
+ origin_flow_refs: list[str] = Field(
334
+ default_factory=list,
335
+ description="IDs of data flows that reach this argument",
336
+ )
337
+
338
+
339
+ class CallContextModel(BaseModel):
340
+ """Structural context about where a call occurs."""
341
+
342
+ in_try_block: bool = False
343
+ in_conditional: bool = False
344
+ in_loop: bool = False
345
+
346
+
347
+ class FunctionCallModel(BaseModel):
348
+ """A function call site."""
349
+
350
+ id: str = Field(description="Unique identifier")
351
+
352
+ caller: str = Field(description="Qualified name of calling function")
353
+ callee: str = Field(description="Qualified name of called function (or best guess)")
354
+ callee_resolved: bool = Field(
355
+ default=True,
356
+ description="Whether we could resolve the exact callee",
357
+ )
358
+
359
+ location: LocationModel
360
+
361
+ arguments: list[ArgumentModel] = Field(default_factory=list)
362
+
363
+ context: CallContextModel = Field(default_factory=CallContextModel)
364
+
365
+ # If callee couldn't be resolved, possible candidates
366
+ possible_callees: list[str] = Field(default_factory=list)
367
+
368
+ sequence_index: int | None = Field(
369
+ default=None,
370
+ description="Monotonic index of this call within the containing function (0-based)",
371
+ )
372
+
373
+
374
+ # =============================================================================
375
+ # Data Flows
376
+ # =============================================================================
377
+
378
+
379
+ class DataOriginModel(BaseModel):
380
+ """Where data originates."""
381
+
382
+ type: str = Field(description="Origin type (HTTP_PATH_PARAM, etc.)")
383
+ name: str | None = Field(default=None, description="Parameter/variable name")
384
+
385
+ location: LocationModel
386
+
387
+ # Reference to entry point if this is HTTP data
388
+ entry_point_ref: str | None = None
389
+
390
+ # Additional context
391
+ metadata: dict[str, Any] = Field(default_factory=dict)
392
+
393
+
394
+ class DataSinkModel(BaseModel):
395
+ """Where data ends up."""
396
+
397
+ function: str = Field(description="Qualified name of function receiving data")
398
+ argument_index: int | None = None
399
+ argument_name: str | None = None
400
+
401
+ location: LocationModel
402
+
403
+ # Reference to call site
404
+ call_ref: str | None = None
405
+
406
+
407
+ class FlowStepModel(BaseModel):
408
+ """A step in a data flow path."""
409
+
410
+ depth: int
411
+ caller: str
412
+ callee: str
413
+
414
+ argument_mapping: dict[str, str] = Field(
415
+ default_factory=dict,
416
+ description="How arguments map: {'input_param': 'callee_param'}",
417
+ )
418
+
419
+ location: LocationModel
420
+
421
+
422
+ class TransformationModel(BaseModel):
423
+ """A transformation applied to data along a flow path."""
424
+
425
+ depth: int
426
+ location: LocationModel
427
+ type: str = Field(
428
+ description="Transformation type: function_call, method_call, string_format, type_constructor, etc."
429
+ )
430
+ description: str | None = None
431
+ function: str | None = Field(
432
+ default=None,
433
+ description="Qualified name of the function/method that performed the transformation",
434
+ )
435
+ call_evidence: CallSiteEvidenceModel | None = Field(
436
+ default=None,
437
+ description="Structural evidence about the transformation call site (co-arguments, keyword args)",
438
+ )
439
+
440
+
441
+ # =============================================================================
442
+ # Call-Site Evidence (shared across transformations and sinks)
443
+ # =============================================================================
444
+
445
+
446
+ class ArgumentConstruction(BaseModel):
447
+ """How a specific argument value is constructed."""
448
+
449
+ method: str = Field(
450
+ description=(
451
+ "Construction method (language-neutral): direct, string_interpolation, "
452
+ "string_concatenation, format_call, call_wrapped, spread, collection"
453
+ ),
454
+ )
455
+ container_type: str | None = Field(
456
+ default=None,
457
+ description="If the argument is a container literal: list, tuple, dict, set, array, map",
458
+ )
459
+
460
+
461
+ class ArgumentEvidenceModel(BaseModel):
462
+ """Structural evidence about a single argument at a call site."""
463
+
464
+ position: int | None = Field(
465
+ default=None, description="Positional index (None for keyword-only)"
466
+ )
467
+ name: str | None = Field(default=None, description="Keyword argument name")
468
+
469
+ is_literal: bool = False
470
+ literal_value: str | None = Field(
471
+ default=None, description="String repr of literal value (truncated if long)"
472
+ )
473
+ literal_type: str | None = Field(
474
+ default=None, description="Type of literal: str, int, float, bool, None, list, dict, tuple"
475
+ )
476
+
477
+ is_variable: bool = False
478
+ variable_name: str | None = None
479
+
480
+ is_call_result: bool = False
481
+ called_function: str | None = None
482
+
483
+ construction: str = Field(
484
+ default="direct",
485
+ description=(
486
+ "How the argument value is constructed (language-neutral): "
487
+ "direct, string_interpolation, string_concatenation, "
488
+ "format_call, call_wrapped, spread, collection"
489
+ ),
490
+ )
491
+
492
+ container_type: str | None = Field(
493
+ default=None,
494
+ description="If the argument is a container literal: list, tuple, dict, set, array, map",
495
+ )
496
+
497
+ source_variables: list[str] = Field(
498
+ default_factory=list,
499
+ description="All variable names referenced in this argument expression",
500
+ )
501
+
502
+ expression_text: str | None = Field(
503
+ default=None,
504
+ description="Source text of non-literal, non-variable expressions (truncated at 500 chars)",
505
+ )
506
+
507
+ is_tainted: bool = Field(
508
+ default=False,
509
+ description="Whether this argument carries tainted data (set during data flow analysis)",
510
+ )
511
+
512
+
513
+ class StringPatternModel(BaseModel):
514
+ """A recognised string pattern in a literal argument."""
515
+
516
+ type: str = Field(
517
+ description="Pattern type: sql_placeholder, sql_keyword, url_scheme, template_syntax, shell_metachar, html_tag, path_traversal"
518
+ )
519
+ pattern: str = Field(description="The matched substring or pattern")
520
+ argument_position: int | None = Field(
521
+ default=None, description="Which argument the pattern was found in"
522
+ )
523
+
524
+
525
+ class CallSiteEvidenceModel(BaseModel):
526
+ """
527
+ Structural evidence about a function call site.
528
+
529
+ Reusable across:
530
+ - Sink evidence (how tainted data arrives at the destination)
531
+ - Transformation evidence (how a transformation call was configured)
532
+ """
533
+
534
+ tainted_argument_position: int | None = Field(
535
+ default=None,
536
+ description="Position of the argument carrying tainted data",
537
+ )
538
+ tainted_argument_name: str | None = Field(
539
+ default=None,
540
+ description="Keyword name of the argument carrying tainted data",
541
+ )
542
+ tainted_argument_construction: str | None = Field(
543
+ default=None,
544
+ description=(
545
+ "How the tainted argument is constructed (language-neutral): direct, "
546
+ "string_interpolation, string_concatenation, format_call, call_wrapped, spread"
547
+ ),
548
+ )
549
+
550
+ all_arguments: list[ArgumentEvidenceModel] = Field(
551
+ default_factory=list,
552
+ description="Evidence for every argument at the call site",
553
+ )
554
+
555
+ string_patterns: list[StringPatternModel] = Field(
556
+ default_factory=list,
557
+ description="String patterns detected in literal arguments (SQL placeholders, URL schemes, etc.)",
558
+ )
559
+
560
+
561
+ class DataFlowModel(BaseModel):
562
+ """
563
+ A tracked data flow from origin to sink.
564
+
565
+ This is the core unit of taint tracking. Each flow represents
566
+ data moving from an entry point (origin) to a function call (sink).
567
+ """
568
+
569
+ id: str = Field(description="Unique identifier (flow-001, etc.)")
570
+
571
+ origin: DataOriginModel
572
+ sink: DataSinkModel
573
+
574
+ path: list[FlowStepModel] = Field(
575
+ default_factory=list,
576
+ description="Function call chain from origin to sink",
577
+ )
578
+
579
+ depth: int = Field(description="Number of function boundaries crossed")
580
+ truncated: bool = Field(
581
+ default=False,
582
+ description="True if flow was cut off at max depth",
583
+ )
584
+
585
+ transformations: list[TransformationModel] = Field(
586
+ default_factory=list,
587
+ description="Transformations applied to data along the path",
588
+ )
589
+
590
+ sink_evidence: CallSiteEvidenceModel | None = Field(
591
+ default=None,
592
+ description="Structural evidence about how data is used at the sink call site",
593
+ )
594
+
595
+ unresolved_calls: list[str] = Field(
596
+ default_factory=list,
597
+ description="Calls in path where callee couldn't be resolved",
598
+ )
599
+
600
+ context: CallContextModel = Field(
601
+ default_factory=CallContextModel,
602
+ description="Execution context at the sink",
603
+ )
604
+
605
+ confidence: str = Field(
606
+ default="HIGH",
607
+ description="Confidence in this flow: HIGH, MEDIUM, LOW",
608
+ )
609
+
610
+
611
+ # =============================================================================
612
+ # Authentication
613
+ # =============================================================================
614
+
615
+
616
+ class AuthSchemeModel(BaseModel):
617
+ """An authentication scheme definition."""
618
+
619
+ id: str
620
+ type: str = Field(description="Auth scheme type (OAUTH2_PASSWORD, API_KEY_HEADER, etc.)")
621
+ framework: str
622
+
623
+ location: LocationModel
624
+
625
+ config: dict[str, Any] = Field(
626
+ default_factory=dict,
627
+ description="Scheme-specific configuration",
628
+ )
629
+
630
+
631
+ class AuthDependencyModel(BaseModel):
632
+ """An authentication dependency/guard."""
633
+
634
+ id: str
635
+ name: str
636
+ type: str = Field(description="FUNCTION, CLASS, DECORATOR, ANNOTATION, etc.")
637
+
638
+ location: LocationModel
639
+
640
+ # What scheme(s) does this use?
641
+ uses_schemes: list[str] = Field(default_factory=list, description="Auth scheme IDs")
642
+
643
+ # What other dependencies does this depend on?
644
+ depends_on: list[str] = Field(default_factory=list, description="Other dependency IDs")
645
+
646
+ # What does this extract/validate?
647
+ extracts: list[str] = Field(default_factory=list, description="Fields extracted")
648
+ validates: list[str] = Field(default_factory=list, description="Validations performed")
649
+
650
+ # Role/permission requirements
651
+ requires_roles: list[str] = Field(default_factory=list)
652
+ requires_scopes: list[str] = Field(default_factory=list)
653
+ requires_permissions: list[str] = Field(default_factory=list)
654
+
655
+ # JWT-specific
656
+ jwt_operations: list[str] = Field(
657
+ default_factory=list, description="decode, validate_exp, etc."
658
+ )
659
+
660
+
661
+ class RouteAuthModel(BaseModel):
662
+ """Authentication configuration for a route."""
663
+
664
+ route_id: str
665
+ auth_required: bool
666
+
667
+ mechanisms: list[str] = Field(
668
+ default_factory=list,
669
+ description="Auth dependency IDs protecting this route",
670
+ )
671
+
672
+ scopes_required: list[str] = Field(default_factory=list)
673
+ roles_required: list[str] = Field(default_factory=list)
674
+ permissions_required: list[str] = Field(default_factory=list)
675
+
676
+ notes: list[str] = Field(default_factory=list)
677
+
678
+
679
+ class JwtConfigModel(BaseModel):
680
+ """JWT configuration detected in the codebase."""
681
+
682
+ detected: bool = False
683
+ library: str | None = None
684
+
685
+ locations: list[LocationModel] = Field(default_factory=list)
686
+
687
+ algorithms: list[str] = Field(default_factory=list)
688
+
689
+ validations: dict[str, bool] = Field(
690
+ default_factory=dict,
691
+ description="What is validated: signature, expiry, issuer, audience",
692
+ )
693
+
694
+ secret_source: str | None = None # environment_variable, config_file, hardcoded
695
+ secret_name: str | None = None
696
+
697
+
698
+ class MultiAuthFlowModel(BaseModel):
699
+ """Multi-authentication flow (OR/AND combinations)."""
700
+
701
+ id: str
702
+ routes: list[str] = Field(description="Route IDs this applies to")
703
+ mechanisms: list[str] = Field(description="Auth scheme/dependency IDs")
704
+ logic: str = Field(description="OR, AND")
705
+ description: str | None = None
706
+
707
+
708
+ class SecurityConfigModel(BaseModel):
709
+ """Security configuration detected."""
710
+
711
+ cors: dict[str, Any] | None = None
712
+ csrf: dict[str, Any] | None = None
713
+ rate_limiting: dict[str, Any] | None = None
714
+
715
+
716
+ class AuthModel(BaseModel):
717
+ """Complete authentication analysis."""
718
+
719
+ schemes_detected: list[AuthSchemeModel] = Field(default_factory=list)
720
+ auth_dependencies: list[AuthDependencyModel] = Field(default_factory=list)
721
+ route_auth_mapping: list[RouteAuthModel] = Field(default_factory=list)
722
+
723
+ global_middleware_auth: bool = Field(
724
+ default=False,
725
+ description=(
726
+ "True when a globally-applied middleware (e.g. add_middleware(AuthMiddleware)) "
727
+ "performs authentication for every route. Lets the engine's missing_auth "
728
+ "rule avoid false positives on middleware-protected apps."
729
+ ),
730
+ )
731
+
732
+ jwt_config: JwtConfigModel = Field(default_factory=JwtConfigModel)
733
+
734
+ multi_auth_flows: list[MultiAuthFlowModel] = Field(default_factory=list)
735
+
736
+ role_definitions: list[dict[str, Any]] = Field(default_factory=list)
737
+
738
+ security_config: SecurityConfigModel = Field(default_factory=SecurityConfigModel)
739
+
740
+
741
+ # =============================================================================
742
+ # Dependencies and Imports
743
+ # =============================================================================
744
+
745
+
746
+ class PackageDependencyModel(BaseModel):
747
+ """External package dependency."""
748
+
749
+ name: str
750
+ version: str | None = None
751
+ version_constraint: str | None = None
752
+ ecosystem: str | None = Field(
753
+ default=None,
754
+ description="OSV ecosystem: PyPI, npm, Go, crates.io, NuGet, Maven",
755
+ )
756
+ is_dev: bool = False
757
+ source_file: str = Field(description="requirements.txt, pyproject.toml, pom.xml, etc.")
758
+
759
+
760
+ class ImportModel(BaseModel):
761
+ """Import statement."""
762
+
763
+ module: str
764
+ names: list[str] = Field(default_factory=list, description="Imported names")
765
+ alias: str | None = None
766
+ is_relative: bool = False
767
+
768
+ location: LocationModel
769
+
770
+
771
+ class DependenciesModel(BaseModel):
772
+ """Dependencies analysis."""
773
+
774
+ packages: list[PackageDependencyModel] = Field(default_factory=list)
775
+ internal_imports: list[ImportModel] = Field(default_factory=list)
776
+
777
+
778
+ # =============================================================================
779
+ # Middleware and Interceptors
780
+ # =============================================================================
781
+
782
+
783
+ class MiddlewareModel(BaseModel):
784
+ """Middleware/interceptor definition."""
785
+
786
+ id: str
787
+ name: str
788
+ type: str = Field(description="middleware, filter, interceptor")
789
+
790
+ location: LocationModel
791
+
792
+ order: int | None = None
793
+
794
+ applies_to: list[str] = Field(
795
+ default_factory=list,
796
+ description="Route patterns or 'all'",
797
+ )
798
+
799
+ # What the middleware does (detected patterns)
800
+ operations: list[str] = Field(
801
+ default_factory=list,
802
+ description="auth, logging, cors, etc.",
803
+ )
804
+
805
+
806
+ # =============================================================================
807
+ # Integrations and Literals
808
+ # =============================================================================
809
+
810
+
811
+ class IntegrationTargetModel(BaseModel):
812
+ """
813
+ A concrete outbound call target extracted from an HTTP client call site.
814
+
815
+ Captures the destination URL/hostname that the application communicates
816
+ with at runtime. Used downstream by the reasoning engine to:
817
+
818
+ - Build inter-service dependency graphs (cross-manifest correlation).
819
+ - Detect SSRF risk (user-controlled URL components).
820
+ - Scope DAST testing to known outbound surfaces.
821
+ """
822
+
823
+ base_url: str = Field(
824
+ description="Scheme + host (+ port if non-standard), e.g. 'https://api.stripe.com'."
825
+ )
826
+ path_pattern: str = Field(
827
+ default="",
828
+ description="Path portion, e.g. '/v1/charges'. Empty when only the host is known.",
829
+ )
830
+ http_method: str = Field(
831
+ default="",
832
+ description="HTTP verb inferred from the call (GET, POST, …). Empty when unknown.",
833
+ )
834
+ is_literal: bool = Field(
835
+ default=True,
836
+ description="True when the URL was a string literal; False for variable / f-string.",
837
+ )
838
+ called_from: LocationModel | None = Field(
839
+ default=None,
840
+ description="File, line, and function where this outbound call was made.",
841
+ )
842
+
843
+
844
+ class IntegrationModel(BaseModel):
845
+ """External integration detected."""
846
+
847
+ id: str
848
+ type: str = Field(description="database, http_client, message_queue, cloud_service, etc.")
849
+ name: str = Field(description="PostgreSQL, Redis, AWS S3, Stripe, etc.")
850
+
851
+ locations: list[LocationModel] = Field(default_factory=list)
852
+
853
+ detection_method: str = Field(description="import, connection_string, sdk_usage")
854
+ confidence: str = "HIGH"
855
+
856
+ sensitivity_labels: list[str] = Field(
857
+ default_factory=list,
858
+ description="Sensitivity classifications: PII, FINANCIAL, AUTH_CREDENTIAL, INTERNAL, GENERAL",
859
+ )
860
+
861
+ targets: list[IntegrationTargetModel] = Field(
862
+ default_factory=list,
863
+ description="Concrete outbound call targets extracted from HTTP client call sites.",
864
+ )
865
+
866
+ metadata: dict[str, Any] = Field(default_factory=dict)
867
+
868
+
869
+ class LiteralPatternModel(BaseModel):
870
+ """Interesting literal pattern found."""
871
+
872
+ type: str = Field(description="url, sql_pattern, secret_pattern, etc.")
873
+ value: str | None = Field(
874
+ default=None, description="The literal value (sanitized if sensitive)"
875
+ )
876
+ pattern: str | None = Field(default=None, description="Pattern description")
877
+
878
+ location: LocationModel
879
+
880
+ confidence: str = "HIGH"
881
+ notes: str | None = None
882
+
883
+
884
+ class LiteralsModel(BaseModel):
885
+ """Literal patterns found."""
886
+
887
+ urls: list[LiteralPatternModel] = Field(default_factory=list)
888
+ sql_patterns: list[LiteralPatternModel] = Field(default_factory=list)
889
+ secret_patterns: list[LiteralPatternModel] = Field(default_factory=list)
890
+
891
+
892
+ # =============================================================================
893
+ # Capability Tagging
894
+ # =============================================================================
895
+
896
+
897
+ class CapabilityModel(BaseModel):
898
+ """Inferred business capability."""
899
+
900
+ tag: str = Field(description="PAYMENT, USER_AUTH, FILE_STORAGE, DATA_PERSISTENCE, etc.")
901
+ evidence: list[str] = Field(
902
+ default_factory=list,
903
+ description="Integration/route IDs that support this inference",
904
+ )
905
+ confidence: str = "MEDIUM"
906
+
907
+
908
+ # =============================================================================
909
+ # Configuration Detection
910
+ # =============================================================================
911
+
912
+
913
+ class EnvVarUsageModel(BaseModel):
914
+ """Environment variable usage."""
915
+
916
+ name: str
917
+ locations: list[LocationModel] = Field(default_factory=list)
918
+ default_value: str | None = None
919
+
920
+
921
+ class ConfigFileModel(BaseModel):
922
+ """Configuration file detected."""
923
+
924
+ path: str
925
+ type: str = Field(description="yaml, json, properties, env, etc.")
926
+
927
+
928
+ class ConfigurationModel(BaseModel):
929
+ """Configuration analysis."""
930
+
931
+ env_vars_used: list[EnvVarUsageModel] = Field(default_factory=list)
932
+ config_files: list[ConfigFileModel] = Field(default_factory=list)
933
+
934
+
935
+ # =============================================================================
936
+ # Analysis Metadata
937
+ # =============================================================================
938
+
939
+
940
+ class AnalysisMetadataModel(BaseModel):
941
+ """Metadata about the analysis process."""
942
+
943
+ data_flow_mode: str = "inter_procedural"
944
+ data_flow_depth: int = 10
945
+
946
+ truncated_flows: int = 0
947
+ unresolved_calls: int = 0
948
+
949
+ parse_errors: list[dict[str, Any]] = Field(default_factory=list)
950
+
951
+ warnings: list[str] = Field(default_factory=list)
952
+
953
+
954
+ # =============================================================================
955
+ # Complete Manifest
956
+ # =============================================================================
957
+
958
+
959
+ class Manifest(BaseModel):
960
+ """
961
+ Complete manifest output from the Probe.
962
+
963
+ This is the primary output - a comprehensive representation of
964
+ all extracted facts about the codebase.
965
+ """
966
+
967
+ manifest_version: str = MANIFEST_VERSION
968
+ generated_at: datetime = Field(default_factory=datetime.utcnow)
969
+ probe_version: str = Field(description="Version of the probe that generated this")
970
+
971
+ project: ProjectMetadata
972
+
973
+ entry_points: list[RouteModel] = Field(default_factory=list)
974
+
975
+ functions: list[FunctionModel] = Field(default_factory=list)
976
+ classes: list[ClassModel] = Field(default_factory=list)
977
+
978
+ calls: list[FunctionCallModel] = Field(default_factory=list)
979
+
980
+ data_flows: list[DataFlowModel] = Field(default_factory=list)
981
+
982
+ auth: AuthModel = Field(default_factory=AuthModel)
983
+
984
+ dependencies: DependenciesModel = Field(default_factory=DependenciesModel)
985
+
986
+ middleware: list[MiddlewareModel] = Field(default_factory=list)
987
+
988
+ integrations: list[IntegrationModel] = Field(default_factory=list)
989
+
990
+ capabilities: list[CapabilityModel] = Field(default_factory=list)
991
+
992
+ schemas: dict[str, SchemaModel] = Field(
993
+ default_factory=dict,
994
+ description="Data model definitions referenced by route bodies, keyed by model name",
995
+ )
996
+
997
+ literals: LiteralsModel = Field(default_factory=LiteralsModel)
998
+
999
+ configuration: ConfigurationModel = Field(default_factory=ConfigurationModel)
1000
+
1001
+ analysis_metadata: AnalysisMetadataModel = Field(default_factory=AnalysisMetadataModel)
1002
+
1003
+ def to_json(self, pretty: bool = False) -> str:
1004
+ """Serialize manifest to JSON."""
1005
+ return self.model_dump_json(indent=2 if pretty else None)
1006
+
1007
+ def to_dict(self) -> dict[str, Any]:
1008
+ """Serialize manifest to dictionary."""
1009
+ return self.model_dump(mode="json")