apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,1399 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integration detection and fingerprinting.
|
|
3
|
+
|
|
4
|
+
Detects external integrations (databases, caches, message queues, HTTP clients,
|
|
5
|
+
cloud services, etc.) used by the codebase through three complementary signals:
|
|
6
|
+
|
|
7
|
+
1. **Import analysis** – which libraries are imported.
|
|
8
|
+
2. **Usage-location tracking** – which files/functions actually call the library.
|
|
9
|
+
3. **SDK usage-pattern extraction** – service-specific info from SDK init calls
|
|
10
|
+
(e.g. ``boto3.client("s3")`` → AWS S3).
|
|
11
|
+
|
|
12
|
+
All detection is data-driven via catalogs, making it trivial to add new
|
|
13
|
+
libraries or languages without changing logic.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from typing import TYPE_CHECKING, Any
|
|
20
|
+
|
|
21
|
+
from ..core.manifest import stable_id
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from ..core.manifest import IntegrationModel
|
|
25
|
+
from ..parsing.base import ParsedFile
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ============================================================================
|
|
29
|
+
# Integration catalog (data-driven)
|
|
30
|
+
# ============================================================================
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class LibraryEntry:
|
|
35
|
+
"""A known library/module that indicates an integration."""
|
|
36
|
+
|
|
37
|
+
import_pattern: str # Matched against import module names
|
|
38
|
+
integration_type: str # database, cache, http_client, cloud_service, etc.
|
|
39
|
+
integration_name: str # Human-readable name (PostgreSQL, Redis, AWS S3, etc.)
|
|
40
|
+
call_prefixes: tuple[str, ...] = () # Call-site prefixes to track usage locations
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
_LIBRARY_CATALOG: list[LibraryEntry] = [
|
|
44
|
+
# ── Databases ──────────────────────────────────────────────────────
|
|
45
|
+
LibraryEntry(
|
|
46
|
+
"sqlalchemy", "database", "SQLAlchemy", ("engine.", "session.", "create_engine", "text(")
|
|
47
|
+
),
|
|
48
|
+
LibraryEntry("psycopg", "database", "PostgreSQL (psycopg)", ("cursor.", "conn.", "psycopg")),
|
|
49
|
+
LibraryEntry(
|
|
50
|
+
"psycopg2",
|
|
51
|
+
"database",
|
|
52
|
+
"PostgreSQL (psycopg2)",
|
|
53
|
+
(
|
|
54
|
+
"cursor.",
|
|
55
|
+
"conn.",
|
|
56
|
+
),
|
|
57
|
+
),
|
|
58
|
+
LibraryEntry("asyncpg", "database", "PostgreSQL (asyncpg)", ("asyncpg.",)),
|
|
59
|
+
LibraryEntry("pymysql", "database", "MySQL (PyMySQL)", ("cursor.", "pymysql.")),
|
|
60
|
+
LibraryEntry("mysqlclient", "database", "MySQL (mysqlclient)", ("cursor.",)),
|
|
61
|
+
LibraryEntry(
|
|
62
|
+
"mysql.connector", "database", "MySQL (Connector)", ("cursor.", "mysql.connector.")
|
|
63
|
+
),
|
|
64
|
+
LibraryEntry("pymongo", "database", "MongoDB", ("pymongo.", "collection.", "MongoClient")),
|
|
65
|
+
LibraryEntry("motor", "database", "MongoDB (Motor)", ("motor.",)),
|
|
66
|
+
LibraryEntry("sqlite3", "database", "SQLite", ("cursor.", "sqlite3.")),
|
|
67
|
+
LibraryEntry(
|
|
68
|
+
"peewee",
|
|
69
|
+
"database",
|
|
70
|
+
"Peewee ORM",
|
|
71
|
+
("peewee.", ".select(", ".insert(", ".update(", ".delete("),
|
|
72
|
+
),
|
|
73
|
+
LibraryEntry("tortoise", "database", "Tortoise ORM", ("tortoise.",)),
|
|
74
|
+
LibraryEntry(
|
|
75
|
+
"sqlmodel",
|
|
76
|
+
"database",
|
|
77
|
+
"SQLModel",
|
|
78
|
+
(
|
|
79
|
+
"sqlmodel.",
|
|
80
|
+
"SQLModel",
|
|
81
|
+
),
|
|
82
|
+
),
|
|
83
|
+
LibraryEntry("databases", "database", "encode/databases", ("databases.",)),
|
|
84
|
+
LibraryEntry("aiosqlite", "database", "SQLite (async)", ("aiosqlite.",)),
|
|
85
|
+
# ── Caches ─────────────────────────────────────────────────────────
|
|
86
|
+
LibraryEntry("redis", "cache", "Redis", ("redis.", "Redis(", ".set(", ".get(", ".hset(")),
|
|
87
|
+
LibraryEntry("aioredis", "cache", "Redis (aioredis)", ("aioredis.",)),
|
|
88
|
+
LibraryEntry("memcache", "cache", "Memcached", ("memcache.",)),
|
|
89
|
+
LibraryEntry("pymemcache", "cache", "Memcached (pymemcache)", ("pymemcache.",)),
|
|
90
|
+
LibraryEntry("cachetools", "cache", "cachetools", ("cachetools.",)),
|
|
91
|
+
# ── Message queues ─────────────────────────────────────────────────
|
|
92
|
+
LibraryEntry(
|
|
93
|
+
"celery", "message_queue", "Celery", ("celery.", "Celery(", ".delay(", ".apply_async(")
|
|
94
|
+
),
|
|
95
|
+
LibraryEntry(
|
|
96
|
+
"pika", "message_queue", "RabbitMQ (pika)", ("pika.", "BlockingConnection", "channel.")
|
|
97
|
+
),
|
|
98
|
+
LibraryEntry("aio_pika", "message_queue", "RabbitMQ (aio-pika)", ("aio_pika.",)),
|
|
99
|
+
LibraryEntry(
|
|
100
|
+
"confluent_kafka",
|
|
101
|
+
"message_queue",
|
|
102
|
+
"Kafka (confluent)",
|
|
103
|
+
("confluent_kafka.", "Producer(", "Consumer("),
|
|
104
|
+
),
|
|
105
|
+
LibraryEntry("aiokafka", "message_queue", "Kafka (aiokafka)", ("aiokafka.",)),
|
|
106
|
+
LibraryEntry(
|
|
107
|
+
"kafka",
|
|
108
|
+
"message_queue",
|
|
109
|
+
"Kafka (kafka-python)",
|
|
110
|
+
("kafka.", "KafkaProducer", "KafkaConsumer"),
|
|
111
|
+
),
|
|
112
|
+
LibraryEntry("kombu", "message_queue", "Kombu", ("kombu.",)),
|
|
113
|
+
LibraryEntry("rq", "message_queue", "RQ (Redis Queue)", ("rq.", "Queue(")),
|
|
114
|
+
LibraryEntry("dramatiq", "message_queue", "Dramatiq", ("dramatiq.",)),
|
|
115
|
+
LibraryEntry("nats", "message_queue", "NATS", ("nats.",)),
|
|
116
|
+
# ── HTTP clients ───────────────────────────────────────────────────
|
|
117
|
+
LibraryEntry("httpx", "http_client", "httpx", ("httpx.", "AsyncClient(", "Client(")),
|
|
118
|
+
LibraryEntry(
|
|
119
|
+
"requests", "http_client", "requests", ("requests.", ".get(", ".post(", ".put(", ".delete(")
|
|
120
|
+
),
|
|
121
|
+
LibraryEntry("aiohttp", "http_client", "aiohttp", ("aiohttp.", "ClientSession(")),
|
|
122
|
+
LibraryEntry("urllib3", "http_client", "urllib3", ("urllib3.",)),
|
|
123
|
+
LibraryEntry("urllib.request", "http_client", "urllib", ("urllib.request.",)),
|
|
124
|
+
# ── Cloud: AWS ─────────────────────────────────────────────────────
|
|
125
|
+
LibraryEntry("boto3", "cloud_service", "AWS", ("boto3.", ".client(", ".resource(")),
|
|
126
|
+
LibraryEntry("botocore", "cloud_service", "AWS (botocore)", ("botocore.",)),
|
|
127
|
+
# ── Cloud: GCP ─────────────────────────────────────────────────────
|
|
128
|
+
LibraryEntry("google.cloud", "cloud_service", "GCP", ("google.cloud.",)),
|
|
129
|
+
LibraryEntry("google.auth", "cloud_service", "GCP (Auth)", ("google.auth.",)),
|
|
130
|
+
# ── Cloud: Azure ───────────────────────────────────────────────────
|
|
131
|
+
LibraryEntry("azure", "cloud_service", "Azure", ("azure.",)),
|
|
132
|
+
# ── Serialization (security-sensitive) ─────────────────────────────
|
|
133
|
+
LibraryEntry("pickle", "serialization", "pickle", ("pickle.", "pickle.loads", "pickle.load")),
|
|
134
|
+
LibraryEntry("marshal", "serialization", "marshal", ("marshal.",)),
|
|
135
|
+
LibraryEntry("jsonpickle", "serialization", "jsonpickle", ("jsonpickle.",)),
|
|
136
|
+
LibraryEntry("dill", "serialization", "dill", ("dill.",)),
|
|
137
|
+
LibraryEntry("yaml", "serialization", "PyYAML", ("yaml.", "yaml.load", "yaml.safe_load")),
|
|
138
|
+
LibraryEntry("ruamel", "serialization", "ruamel.yaml", ("ruamel.",)),
|
|
139
|
+
# ── XML (XXE-relevant) ─────────────────────────────────────────────
|
|
140
|
+
LibraryEntry(
|
|
141
|
+
"xml.etree", "xml_parser", "xml.etree", ("etree.", "ElementTree.", "parse(", "fromstring(")
|
|
142
|
+
),
|
|
143
|
+
LibraryEntry("lxml", "xml_parser", "lxml", ("lxml.", "etree.")),
|
|
144
|
+
LibraryEntry("defusedxml", "xml_parser", "defusedxml (safe)", ("defusedxml.",)),
|
|
145
|
+
LibraryEntry("xml.sax", "xml_parser", "xml.sax", ("xml.sax.",)),
|
|
146
|
+
LibraryEntry("xml.dom", "xml_parser", "xml.dom", ("xml.dom.",)),
|
|
147
|
+
# ── Templating (SSTI-relevant) ─────────────────────────────────────
|
|
148
|
+
LibraryEntry("jinja2", "template_engine", "Jinja2", ("jinja2.", "Template(", "Environment(")),
|
|
149
|
+
LibraryEntry("mako", "template_engine", "Mako", ("mako.",)),
|
|
150
|
+
LibraryEntry("chameleon", "template_engine", "Chameleon", ("chameleon.",)),
|
|
151
|
+
# ── Cryptography ───────────────────────────────────────────────────
|
|
152
|
+
LibraryEntry("cryptography", "crypto", "cryptography", ("cryptography.",)),
|
|
153
|
+
LibraryEntry("Crypto", "crypto", "PyCryptodome", ("Crypto.",)),
|
|
154
|
+
LibraryEntry("hashlib", "crypto", "hashlib", ("hashlib.", "hashlib.md5", "hashlib.sha")),
|
|
155
|
+
LibraryEntry("hmac", "crypto", "hmac", ("hmac.",)),
|
|
156
|
+
LibraryEntry("bcrypt", "crypto", "bcrypt", ("bcrypt.",)),
|
|
157
|
+
LibraryEntry("passlib", "crypto", "passlib", ("passlib.",)),
|
|
158
|
+
LibraryEntry("jwt", "crypto", "PyJWT", ("jwt.", "jwt.decode", "jwt.encode")),
|
|
159
|
+
LibraryEntry("jose", "crypto", "python-jose", ("jose.",)),
|
|
160
|
+
LibraryEntry("itsdangerous", "crypto", "itsdangerous", ("itsdangerous.",)),
|
|
161
|
+
# ── LDAP ───────────────────────────────────────────────────────────
|
|
162
|
+
LibraryEntry(
|
|
163
|
+
"ldap3", "directory_service", "LDAP (ldap3)", ("ldap3.", "Connection(", "Server(")
|
|
164
|
+
),
|
|
165
|
+
LibraryEntry("ldap", "directory_service", "LDAP (python-ldap)", ("ldap.",)),
|
|
166
|
+
# ── Email ──────────────────────────────────────────────────────────
|
|
167
|
+
LibraryEntry("smtplib", "email", "SMTP", ("smtplib.", "SMTP(")),
|
|
168
|
+
LibraryEntry("sendgrid", "email", "SendGrid", ("sendgrid.",)),
|
|
169
|
+
LibraryEntry("mailgun", "email", "Mailgun", ("mailgun.",)),
|
|
170
|
+
# ── File storage ───────────────────────────────────────────────────
|
|
171
|
+
LibraryEntry("pathlib", "file_system", "pathlib", ()), # too common for call tracking
|
|
172
|
+
LibraryEntry("shutil", "file_system", "shutil", ("shutil.",)),
|
|
173
|
+
# ── Subprocess / OS (command injection relevant) ───────────────────
|
|
174
|
+
LibraryEntry(
|
|
175
|
+
"subprocess", "os_command", "subprocess", ("subprocess.", "Popen(", "run(", "call(")
|
|
176
|
+
),
|
|
177
|
+
LibraryEntry("os", "os_command", "os (system/exec)", ("os.system", "os.popen", "os.exec")),
|
|
178
|
+
# ── Logging ────────────────────────────────────────────────────────
|
|
179
|
+
LibraryEntry("sentry_sdk", "monitoring", "Sentry", ("sentry_sdk.",)),
|
|
180
|
+
LibraryEntry("structlog", "logging", "structlog", ("structlog.",)),
|
|
181
|
+
LibraryEntry("loguru", "logging", "loguru", ("loguru.",)),
|
|
182
|
+
# ── Auth providers ─────────────────────────────────────────────────
|
|
183
|
+
LibraryEntry("authlib", "auth_provider", "Authlib", ("authlib.",)),
|
|
184
|
+
# ==========================================================================
|
|
185
|
+
# JVM / Java libraries
|
|
186
|
+
# ==========================================================================
|
|
187
|
+
# ── JVM Databases ──────────────────────────────────────────────────
|
|
188
|
+
# JPA / Hibernate (ORM)
|
|
189
|
+
LibraryEntry(
|
|
190
|
+
"javax.persistence",
|
|
191
|
+
"database",
|
|
192
|
+
"JPA",
|
|
193
|
+
("entityManager.", "em.", "createQuery", "persist(", "merge("),
|
|
194
|
+
),
|
|
195
|
+
LibraryEntry(
|
|
196
|
+
"jakarta.persistence",
|
|
197
|
+
"database",
|
|
198
|
+
"JPA",
|
|
199
|
+
("entityManager.", "em.", "createQuery", "persist(", "merge("),
|
|
200
|
+
),
|
|
201
|
+
LibraryEntry(
|
|
202
|
+
"org.hibernate",
|
|
203
|
+
"database",
|
|
204
|
+
"Hibernate ORM",
|
|
205
|
+
("session.", "sessionFactory.", "HibernateTemplate"),
|
|
206
|
+
),
|
|
207
|
+
LibraryEntry(
|
|
208
|
+
"org.springframework.data",
|
|
209
|
+
"database",
|
|
210
|
+
"Spring Data",
|
|
211
|
+
("repository.", "findById", "findAll", "save(", "deleteById"),
|
|
212
|
+
),
|
|
213
|
+
LibraryEntry(
|
|
214
|
+
"org.jooq",
|
|
215
|
+
"database",
|
|
216
|
+
"jOOQ",
|
|
217
|
+
("dslContext.", "DSL.", ".select(", ".insertInto(", ".update("),
|
|
218
|
+
),
|
|
219
|
+
LibraryEntry("org.mybatis", "database", "MyBatis", ("sqlSession.", "mapper.")),
|
|
220
|
+
LibraryEntry("io.r2dbc", "database", "R2DBC", ("connection.", "statement.", "r2dbc")),
|
|
221
|
+
# JDBC drivers
|
|
222
|
+
LibraryEntry(
|
|
223
|
+
"org.postgresql", "database", "PostgreSQL (JDBC)", ("DriverManager.", "prepareStatement(")
|
|
224
|
+
),
|
|
225
|
+
LibraryEntry("com.mysql", "database", "MySQL (JDBC)", ("DriverManager.", "prepareStatement(")),
|
|
226
|
+
LibraryEntry("com.h2database", "database", "H2 (in-memory)", ("H2ConsoleServlet",)),
|
|
227
|
+
LibraryEntry("org.hsqldb", "database", "HSQLDB", ()),
|
|
228
|
+
LibraryEntry("com.zaxxer.hikari", "database", "HikariCP", ("HikariDataSource", "HikariConfig")),
|
|
229
|
+
LibraryEntry("org.apache.commons.dbcp", "database", "Apache DBCP", ("BasicDataSource",)),
|
|
230
|
+
# NoSQL
|
|
231
|
+
LibraryEntry(
|
|
232
|
+
"com.mongodb",
|
|
233
|
+
"database",
|
|
234
|
+
"MongoDB (Java Driver)",
|
|
235
|
+
("MongoClient", "collection.", "find(", "insertOne(", "updateOne("),
|
|
236
|
+
),
|
|
237
|
+
LibraryEntry(
|
|
238
|
+
"org.springframework.data.mongodb",
|
|
239
|
+
"database",
|
|
240
|
+
"Spring Data MongoDB",
|
|
241
|
+
("mongoTemplate.", "repository."),
|
|
242
|
+
),
|
|
243
|
+
LibraryEntry("com.datastax", "database", "Cassandra", ("CqlSession", "session.execute(")),
|
|
244
|
+
LibraryEntry(
|
|
245
|
+
"redis.clients.jedis", "cache", "Redis (Jedis)", ("jedis.", "Jedis(", ".get(", ".set(")
|
|
246
|
+
),
|
|
247
|
+
LibraryEntry(
|
|
248
|
+
"io.lettuce.core", "cache", "Redis (Lettuce)", ("RedisClient", "connection.", "commands.")
|
|
249
|
+
),
|
|
250
|
+
LibraryEntry("net.spy.memcached", "cache", "Memcached (spymemcached)", ("MemcachedClient",)),
|
|
251
|
+
# ── JVM Message queues ─────────────────────────────────────────────
|
|
252
|
+
LibraryEntry(
|
|
253
|
+
"org.apache.kafka",
|
|
254
|
+
"message_queue",
|
|
255
|
+
"Kafka",
|
|
256
|
+
("KafkaProducer", "KafkaConsumer", "producer.send(", "consumer.poll("),
|
|
257
|
+
),
|
|
258
|
+
LibraryEntry(
|
|
259
|
+
"org.springframework.kafka",
|
|
260
|
+
"message_queue",
|
|
261
|
+
"Kafka (Spring)",
|
|
262
|
+
("KafkaTemplate", "kafkaTemplate.", "@KafkaListener"),
|
|
263
|
+
),
|
|
264
|
+
LibraryEntry(
|
|
265
|
+
"com.rabbitmq",
|
|
266
|
+
"message_queue",
|
|
267
|
+
"RabbitMQ (AMQP)",
|
|
268
|
+
("channel.", "ConnectionFactory", "basicPublish(", "basicConsume("),
|
|
269
|
+
),
|
|
270
|
+
LibraryEntry(
|
|
271
|
+
"org.springframework.amqp",
|
|
272
|
+
"message_queue",
|
|
273
|
+
"RabbitMQ (Spring)",
|
|
274
|
+
("rabbitTemplate.", "RabbitTemplate", "@RabbitListener"),
|
|
275
|
+
),
|
|
276
|
+
LibraryEntry(
|
|
277
|
+
"io.nats.client",
|
|
278
|
+
"message_queue",
|
|
279
|
+
"NATS",
|
|
280
|
+
("Nats.connect", "connection.publish(", "connection.subscribe("),
|
|
281
|
+
),
|
|
282
|
+
LibraryEntry(
|
|
283
|
+
"javax.jms",
|
|
284
|
+
"message_queue",
|
|
285
|
+
"JMS",
|
|
286
|
+
("jmsTemplate.", "session.createQueue(", "producer.send("),
|
|
287
|
+
),
|
|
288
|
+
LibraryEntry(
|
|
289
|
+
"jakarta.jms",
|
|
290
|
+
"message_queue",
|
|
291
|
+
"JMS",
|
|
292
|
+
("jmsTemplate.", "session.createQueue(", "producer.send("),
|
|
293
|
+
),
|
|
294
|
+
LibraryEntry(
|
|
295
|
+
"org.apache.activemq", "message_queue", "ActiveMQ", ("ActiveMQConnectionFactory",)
|
|
296
|
+
),
|
|
297
|
+
LibraryEntry(
|
|
298
|
+
"com.amazonaws.services.sqs",
|
|
299
|
+
"message_queue",
|
|
300
|
+
"AWS SQS",
|
|
301
|
+
("SqsClient", "sendMessage(", "receiveMessage("),
|
|
302
|
+
),
|
|
303
|
+
# ── JVM HTTP clients ───────────────────────────────────────────────
|
|
304
|
+
LibraryEntry(
|
|
305
|
+
"org.springframework.web.client",
|
|
306
|
+
"http_client",
|
|
307
|
+
"Spring RestTemplate",
|
|
308
|
+
("restTemplate.", "exchange(", "getForObject(", "postForObject("),
|
|
309
|
+
),
|
|
310
|
+
LibraryEntry(
|
|
311
|
+
"org.springframework.web.reactive.function.client",
|
|
312
|
+
"http_client",
|
|
313
|
+
"Spring WebClient",
|
|
314
|
+
("WebClient", "webClient.", ".retrieve(", ".bodyToMono("),
|
|
315
|
+
),
|
|
316
|
+
LibraryEntry(
|
|
317
|
+
"com.squareup.okhttp3",
|
|
318
|
+
"http_client",
|
|
319
|
+
"OkHttp",
|
|
320
|
+
("OkHttpClient", "okHttpClient.", "newCall(", ".execute("),
|
|
321
|
+
),
|
|
322
|
+
LibraryEntry(
|
|
323
|
+
"org.apache.http",
|
|
324
|
+
"http_client",
|
|
325
|
+
"Apache HttpClient",
|
|
326
|
+
("HttpClient", "httpClient.", "execute(", "CloseableHttpClient"),
|
|
327
|
+
),
|
|
328
|
+
LibraryEntry(
|
|
329
|
+
"org.apache.hc",
|
|
330
|
+
"http_client",
|
|
331
|
+
"Apache HttpClient 5",
|
|
332
|
+
("HttpClient", "CloseableHttpClient", "execute("),
|
|
333
|
+
),
|
|
334
|
+
LibraryEntry("feign", "http_client", "OpenFeign", ("Feign.builder(", "@FeignClient")),
|
|
335
|
+
LibraryEntry("retrofit2", "http_client", "Retrofit", ("Retrofit.Builder(", "retrofit.create(")),
|
|
336
|
+
LibraryEntry(
|
|
337
|
+
"java.net.http",
|
|
338
|
+
"http_client",
|
|
339
|
+
"Java HttpClient",
|
|
340
|
+
("HttpClient.newHttpClient(", "HttpRequest.newBuilder(", "client.send("),
|
|
341
|
+
),
|
|
342
|
+
# ── JVM Cloud: AWS ─────────────────────────────────────────────────
|
|
343
|
+
LibraryEntry(
|
|
344
|
+
"com.amazonaws",
|
|
345
|
+
"cloud_service",
|
|
346
|
+
"AWS SDK v1",
|
|
347
|
+
("AmazonS3", "AmazonDynamoDB", "AWSLambda", "AmazonSNS", "AmazonSQS"),
|
|
348
|
+
),
|
|
349
|
+
LibraryEntry(
|
|
350
|
+
"software.amazon.awssdk",
|
|
351
|
+
"cloud_service",
|
|
352
|
+
"AWS SDK v2",
|
|
353
|
+
("S3Client", "DynamoDbClient", "LambdaClient", "SnsClient", "SqsClient"),
|
|
354
|
+
),
|
|
355
|
+
# ── JVM Cloud: GCP ─────────────────────────────────────────────────
|
|
356
|
+
LibraryEntry(
|
|
357
|
+
"com.google.cloud",
|
|
358
|
+
"cloud_service",
|
|
359
|
+
"GCP Java SDK",
|
|
360
|
+
("Storage", "Firestore", "BigQuery", "PubSub", "Spanner"),
|
|
361
|
+
),
|
|
362
|
+
LibraryEntry("com.google.auth", "cloud_service", "GCP Auth", ("GoogleCredentials",)),
|
|
363
|
+
# ── JVM Cloud: Azure ───────────────────────────────────────────────
|
|
364
|
+
LibraryEntry(
|
|
365
|
+
"com.azure",
|
|
366
|
+
"cloud_service",
|
|
367
|
+
"Azure SDK",
|
|
368
|
+
("BlobClient", "QueueClient", "SecretClient", "CosmosClient"),
|
|
369
|
+
),
|
|
370
|
+
LibraryEntry("com.microsoft.azure", "cloud_service", "Azure SDK (legacy)", ()),
|
|
371
|
+
# ── JVM Serialization ──────────────────────────────────────────────
|
|
372
|
+
LibraryEntry(
|
|
373
|
+
"com.fasterxml.jackson",
|
|
374
|
+
"serialization",
|
|
375
|
+
"Jackson",
|
|
376
|
+
("objectMapper.", "ObjectMapper(", "readValue(", "writeValueAsString("),
|
|
377
|
+
),
|
|
378
|
+
LibraryEntry(
|
|
379
|
+
"com.google.gson", "serialization", "Gson", ("gson.", "Gson(", "fromJson(", "toJson(")
|
|
380
|
+
),
|
|
381
|
+
LibraryEntry(
|
|
382
|
+
"org.json", "serialization", "JSON.org", ("JSONObject(", "JSONArray(", "new JSONObject")
|
|
383
|
+
),
|
|
384
|
+
LibraryEntry(
|
|
385
|
+
"com.fasterxml.jackson.dataformat.xml", "xml_parser", "Jackson XML", ("XmlMapper",)
|
|
386
|
+
),
|
|
387
|
+
LibraryEntry(
|
|
388
|
+
"javax.xml",
|
|
389
|
+
"xml_parser",
|
|
390
|
+
"JAXB / Java XML",
|
|
391
|
+
("JAXBContext", "Marshaller", "Unmarshaller", "DocumentBuilder"),
|
|
392
|
+
),
|
|
393
|
+
LibraryEntry(
|
|
394
|
+
"jakarta.xml",
|
|
395
|
+
"xml_parser",
|
|
396
|
+
"JAXB / Jakarta XML",
|
|
397
|
+
("JAXBContext", "Marshaller", "Unmarshaller"),
|
|
398
|
+
),
|
|
399
|
+
LibraryEntry(
|
|
400
|
+
"org.w3c.dom", "xml_parser", "W3C DOM", ("DocumentBuilder", "Document", "Element")
|
|
401
|
+
),
|
|
402
|
+
LibraryEntry(
|
|
403
|
+
"javax.xml.parsers",
|
|
404
|
+
"xml_parser",
|
|
405
|
+
"SAX/DOM Parser",
|
|
406
|
+
("DocumentBuilderFactory", "SAXParserFactory"),
|
|
407
|
+
),
|
|
408
|
+
# ── JVM Cryptography ───────────────────────────────────────────────
|
|
409
|
+
LibraryEntry(
|
|
410
|
+
"javax.crypto",
|
|
411
|
+
"crypto",
|
|
412
|
+
"Java Crypto (JCE)",
|
|
413
|
+
("Cipher.", "SecretKey", "KeyGenerator", "Mac.", "MessageDigest"),
|
|
414
|
+
),
|
|
415
|
+
LibraryEntry(
|
|
416
|
+
"java.security",
|
|
417
|
+
"crypto",
|
|
418
|
+
"Java Security (JCA)",
|
|
419
|
+
("MessageDigest", "Signature", "KeyPair", "KeyStore"),
|
|
420
|
+
),
|
|
421
|
+
LibraryEntry(
|
|
422
|
+
"org.bouncycastle", "crypto", "Bouncy Castle", ("BCProvider", "Cipher", "KeyPairGenerator")
|
|
423
|
+
),
|
|
424
|
+
LibraryEntry(
|
|
425
|
+
"io.jsonwebtoken", "crypto", "jjwt", ("Jwts.", "JwtBuilder", "parseClaimsJws(", "signWith(")
|
|
426
|
+
),
|
|
427
|
+
LibraryEntry(
|
|
428
|
+
"com.nimbusds",
|
|
429
|
+
"crypto",
|
|
430
|
+
"Nimbus JOSE+JWT",
|
|
431
|
+
("JWTParser", "SignedJWT", "JWSObject", "NimbusJwtDecoder"),
|
|
432
|
+
),
|
|
433
|
+
LibraryEntry(
|
|
434
|
+
"org.springframework.security.crypto",
|
|
435
|
+
"crypto",
|
|
436
|
+
"Spring Security Crypto",
|
|
437
|
+
("BCryptPasswordEncoder", "passwordEncoder.", "encode(", "matches("),
|
|
438
|
+
),
|
|
439
|
+
# ── JVM LDAP / Directory ───────────────────────────────────────────
|
|
440
|
+
LibraryEntry(
|
|
441
|
+
"org.springframework.ldap",
|
|
442
|
+
"directory_service",
|
|
443
|
+
"Spring LDAP",
|
|
444
|
+
("ldapTemplate.", "LdapTemplate"),
|
|
445
|
+
),
|
|
446
|
+
LibraryEntry(
|
|
447
|
+
"javax.naming",
|
|
448
|
+
"directory_service",
|
|
449
|
+
"JNDI / LDAP",
|
|
450
|
+
("InitialDirContext", "DirContext", "NamingEnumeration"),
|
|
451
|
+
),
|
|
452
|
+
# ── JVM Email ──────────────────────────────────────────────────────
|
|
453
|
+
LibraryEntry(
|
|
454
|
+
"javax.mail", "email", "JavaMail", ("Session.getInstance", "Transport.send(", "MimeMessage")
|
|
455
|
+
),
|
|
456
|
+
LibraryEntry(
|
|
457
|
+
"jakarta.mail",
|
|
458
|
+
"email",
|
|
459
|
+
"Jakarta Mail",
|
|
460
|
+
("Session.getInstance", "Transport.send(", "MimeMessage"),
|
|
461
|
+
),
|
|
462
|
+
LibraryEntry(
|
|
463
|
+
"org.springframework.mail",
|
|
464
|
+
"email",
|
|
465
|
+
"Spring Mail",
|
|
466
|
+
("mailSender.", "JavaMailSender", "SimpleMailMessage"),
|
|
467
|
+
),
|
|
468
|
+
LibraryEntry("com.sendgrid", "email", "SendGrid Java", ("SendGrid(", "sg.api(", "Mail(")),
|
|
469
|
+
# ── JVM File / OS ──────────────────────────────────────────────────
|
|
470
|
+
LibraryEntry(
|
|
471
|
+
"java.lang.Runtime", "os_command", "Runtime.exec", ("Runtime.getRuntime(", ".exec(")
|
|
472
|
+
),
|
|
473
|
+
LibraryEntry(
|
|
474
|
+
"java.lang.ProcessBuilder", "os_command", "ProcessBuilder", ("ProcessBuilder(", ".start(")
|
|
475
|
+
),
|
|
476
|
+
LibraryEntry("java.nio.file", "file_system", "Java NIO File", ("Files.", "Paths.", "Path.of(")),
|
|
477
|
+
LibraryEntry(
|
|
478
|
+
"java.io",
|
|
479
|
+
"file_system",
|
|
480
|
+
"Java IO",
|
|
481
|
+
("FileInputStream", "FileOutputStream", "BufferedReader", "FileWriter"),
|
|
482
|
+
),
|
|
483
|
+
# ── JVM Monitoring / Logging ───────────────────────────────────────
|
|
484
|
+
LibraryEntry(
|
|
485
|
+
"io.micrometer",
|
|
486
|
+
"monitoring",
|
|
487
|
+
"Micrometer",
|
|
488
|
+
("meterRegistry.", "Counter.", "Timer.", "Gauge."),
|
|
489
|
+
),
|
|
490
|
+
LibraryEntry(
|
|
491
|
+
"io.opentelemetry", "monitoring", "OpenTelemetry", ("Tracer", "Span", "openTelemetry.")
|
|
492
|
+
),
|
|
493
|
+
LibraryEntry(
|
|
494
|
+
"io.sentry", "monitoring", "Sentry", ("Sentry.captureException(", "Sentry.captureMessage(")
|
|
495
|
+
),
|
|
496
|
+
LibraryEntry("org.slf4j", "logging", "SLF4J", ("LoggerFactory.getLogger(", "log.", "logger.")),
|
|
497
|
+
LibraryEntry(
|
|
498
|
+
"org.apache.logging.log4j",
|
|
499
|
+
"logging",
|
|
500
|
+
"Log4j2",
|
|
501
|
+
("LogManager.getLogger(", "logger.", "log4j"),
|
|
502
|
+
),
|
|
503
|
+
LibraryEntry("ch.qos.logback", "logging", "Logback", ("LoggerFactory.getLogger(",)),
|
|
504
|
+
# ── JVM Auth providers ─────────────────────────────────────────────
|
|
505
|
+
LibraryEntry(
|
|
506
|
+
"org.springframework.security",
|
|
507
|
+
"auth_provider",
|
|
508
|
+
"Spring Security",
|
|
509
|
+
("SecurityContextHolder", "Authentication", "UserDetails", "WebSecurityConfigurerAdapter"),
|
|
510
|
+
),
|
|
511
|
+
LibraryEntry("com.okta", "auth_provider", "Okta Java SDK", ("OktaClient", "okta.")),
|
|
512
|
+
LibraryEntry("com.auth0", "auth_provider", "Auth0 Java SDK", ("AuthAPI", "mgmtAPI.", "auth0.")),
|
|
513
|
+
# ── JavaScript / TypeScript — Databases ───────────────────────────────
|
|
514
|
+
# Call prefixes use library-specific tokens to avoid collisions;
|
|
515
|
+
# generic names like "query(", "db.", "Client(" are omitted.
|
|
516
|
+
LibraryEntry("mongoose", "database", "MongoDB (Mongoose)", ("mongoose.", "mongoose.model(")),
|
|
517
|
+
LibraryEntry(
|
|
518
|
+
"mongodb", "database", "MongoDB (Node driver)", ("MongoClient(", "collection.find(")
|
|
519
|
+
),
|
|
520
|
+
LibraryEntry("pg", "database", "PostgreSQL (node-postgres)", ("pg.Pool(", "pg.Client(")),
|
|
521
|
+
LibraryEntry("mysql", "database", "MySQL (mysql)", ("mysql.createConnection(",)),
|
|
522
|
+
LibraryEntry("mysql2", "database", "MySQL (mysql2)", ("mysql2.createConnection(",)),
|
|
523
|
+
LibraryEntry(
|
|
524
|
+
"mssql", "database", "MS SQL Server (mssql)", ("mssql.", "sql.connect(", "ConnectionPool(")
|
|
525
|
+
),
|
|
526
|
+
LibraryEntry(
|
|
527
|
+
"tedious", "database", "MS SQL Server (tedious)", ("tedious.", "Connection(", "Request(")
|
|
528
|
+
),
|
|
529
|
+
LibraryEntry("oracledb", "database", "Oracle DB", ("oracledb.", "getConnection(", "execute(")),
|
|
530
|
+
LibraryEntry("sqlite3", "database", "SQLite (node)", ("db.run(", "db.get(", "db.all(")),
|
|
531
|
+
# Note: "redis" npm package is already covered by the Python redis entry above.
|
|
532
|
+
LibraryEntry("ioredis", "cache", "Redis (ioredis)", ("ioredis.", "new Redis(")),
|
|
533
|
+
LibraryEntry(
|
|
534
|
+
"@prisma/client", "database", "Prisma ORM", ("PrismaClient(", "prisma.", "prisma.$connect(")
|
|
535
|
+
),
|
|
536
|
+
LibraryEntry("typeorm", "database", "TypeORM", ("DataSource(", "typeorm.", "getRepository(")),
|
|
537
|
+
LibraryEntry("sequelize", "database", "Sequelize", ("Sequelize(", "sequelize.define(")),
|
|
538
|
+
LibraryEntry("knex", "database", "Knex.js", ("knex(", "knex.schema.", "trx.")),
|
|
539
|
+
# ── JavaScript / TypeScript — HTTP Clients ────────────────────────────
|
|
540
|
+
# Avoid generic prefixes like "get(", "post(", "request(", "fetch(" — use
|
|
541
|
+
# library-specific tokens only.
|
|
542
|
+
LibraryEntry("axios", "http_client", "Axios", ("axios.", "axios.create(")),
|
|
543
|
+
LibraryEntry("node-fetch", "http_client", "node-fetch", ("node-fetch",)),
|
|
544
|
+
LibraryEntry("got", "http_client", "Got", ("got.get(", "got.post(", "got.extend(")),
|
|
545
|
+
LibraryEntry("superagent", "http_client", "SuperAgent", ("superagent.",)),
|
|
546
|
+
LibraryEntry("undici", "http_client", "Undici", ("undici.", "undici.fetch(")),
|
|
547
|
+
LibraryEntry(
|
|
548
|
+
"@nestjs/axios", "http_client", "NestJS HttpModule", ("httpService.", "httpService.get(")
|
|
549
|
+
),
|
|
550
|
+
# ── JavaScript / TypeScript — Message Queues ──────────────────────────
|
|
551
|
+
LibraryEntry(
|
|
552
|
+
"kafkajs",
|
|
553
|
+
"message_queue",
|
|
554
|
+
"Kafka (kafkajs)",
|
|
555
|
+
("Kafka(", "kafka.producer(", "kafka.consumer("),
|
|
556
|
+
),
|
|
557
|
+
LibraryEntry(
|
|
558
|
+
"amqplib",
|
|
559
|
+
"message_queue",
|
|
560
|
+
"RabbitMQ (amqplib)",
|
|
561
|
+
("amqplib.connect(", "channel.assertQueue("),
|
|
562
|
+
),
|
|
563
|
+
LibraryEntry("bull", "message_queue", "Bull Queue", ("new Queue(", "queue.process(")),
|
|
564
|
+
LibraryEntry("bullmq", "message_queue", "BullMQ", ("new Queue(", "new Worker(")),
|
|
565
|
+
LibraryEntry("@nestjs/bull", "message_queue", "NestJS Bull", ("InjectQueue(", "InjectQueue")),
|
|
566
|
+
# ── JavaScript / TypeScript — Cloud ───────────────────────────────────
|
|
567
|
+
LibraryEntry(
|
|
568
|
+
"aws-sdk", "cloud_service", "AWS SDK v2", ("AWS.", "new AWS.S3(", "new AWS.DynamoDB(")
|
|
569
|
+
),
|
|
570
|
+
LibraryEntry(
|
|
571
|
+
"@aws-sdk/client-s3", "cloud_service", "AWS S3", ("S3Client(", "PutObjectCommand(")
|
|
572
|
+
),
|
|
573
|
+
LibraryEntry(
|
|
574
|
+
"@aws-sdk/client-dynamodb",
|
|
575
|
+
"cloud_service",
|
|
576
|
+
"AWS DynamoDB",
|
|
577
|
+
("DynamoDBClient(", "GetItemCommand("),
|
|
578
|
+
),
|
|
579
|
+
LibraryEntry(
|
|
580
|
+
"@aws-sdk/lib-dynamodb",
|
|
581
|
+
"cloud_service",
|
|
582
|
+
"AWS DynamoDB (DocumentClient)",
|
|
583
|
+
("DynamoDBDocumentClient(", "GetCommand("),
|
|
584
|
+
),
|
|
585
|
+
LibraryEntry(
|
|
586
|
+
"@aws-sdk/client-sqs", "cloud_service", "AWS SQS", ("SQSClient(", "SendMessageCommand(")
|
|
587
|
+
),
|
|
588
|
+
LibraryEntry(
|
|
589
|
+
"@aws-sdk/client-secretsmanager",
|
|
590
|
+
"cloud_service",
|
|
591
|
+
"AWS Secrets Manager",
|
|
592
|
+
("SecretsManagerClient(", "GetSecretValueCommand("),
|
|
593
|
+
),
|
|
594
|
+
LibraryEntry("@google-cloud/storage", "cloud_service", "GCS", ("Storage(", "bucket(", "gcs.")),
|
|
595
|
+
LibraryEntry(
|
|
596
|
+
"@azure/storage-blob",
|
|
597
|
+
"cloud_service",
|
|
598
|
+
"Azure Blob",
|
|
599
|
+
("BlobServiceClient(", "containerClient."),
|
|
600
|
+
),
|
|
601
|
+
# ── JavaScript / TypeScript — Email / Notification ────────────────────
|
|
602
|
+
LibraryEntry(
|
|
603
|
+
"nodemailer",
|
|
604
|
+
"email",
|
|
605
|
+
"Nodemailer",
|
|
606
|
+
("createTransport(", "transporter.", "transporter.sendMail("),
|
|
607
|
+
),
|
|
608
|
+
LibraryEntry("@sendgrid/mail", "email", "SendGrid (JS)", ("sgMail.", "sgMail.send(")),
|
|
609
|
+
LibraryEntry("twilio", "sms", "Twilio", ("Twilio(", "client.messages.create(")),
|
|
610
|
+
LibraryEntry("stripe", "payments", "Stripe", ("Stripe(", "stripe.", "stripe.charges.")),
|
|
611
|
+
# ── JavaScript / TypeScript — Auth Providers ──────────────────────────
|
|
612
|
+
LibraryEntry(
|
|
613
|
+
"passport",
|
|
614
|
+
"auth_provider",
|
|
615
|
+
"Passport.js",
|
|
616
|
+
("passport.", "passport.use(", "passport.authenticate("),
|
|
617
|
+
),
|
|
618
|
+
LibraryEntry("passport-jwt", "auth_provider", "Passport JWT", ("JwtStrategy(", "ExtractJwt.")),
|
|
619
|
+
LibraryEntry("passport-saml", "auth_provider", "Passport SAML", ("SamlStrategy(",)),
|
|
620
|
+
LibraryEntry(
|
|
621
|
+
"@nestjs/jwt", "auth_provider", "NestJS JWT", ("JwtService(", "jwtService.", "JwtModule.")
|
|
622
|
+
),
|
|
623
|
+
LibraryEntry(
|
|
624
|
+
"@nestjs/passport", "auth_provider", "NestJS Passport", ("AuthGuard(", "PassportStrategy(")
|
|
625
|
+
),
|
|
626
|
+
LibraryEntry(
|
|
627
|
+
"jsonwebtoken",
|
|
628
|
+
"auth_provider",
|
|
629
|
+
"JWT (jsonwebtoken)",
|
|
630
|
+
("jwt.sign(", "jwt.verify(", "jwt.decode("),
|
|
631
|
+
),
|
|
632
|
+
LibraryEntry(
|
|
633
|
+
"openid-client",
|
|
634
|
+
"auth_provider",
|
|
635
|
+
"OIDC (openid-client)",
|
|
636
|
+
("Issuer.", "Client(", "generators."),
|
|
637
|
+
),
|
|
638
|
+
LibraryEntry("keycloak-connect", "auth_provider", "Keycloak", ("Keycloak(", "keycloak.")),
|
|
639
|
+
LibraryEntry(
|
|
640
|
+
"@auth0/auth0-spa-js", "auth_provider", "Auth0 JS SDK", ("Auth0Client(", "auth0.")
|
|
641
|
+
),
|
|
642
|
+
LibraryEntry(
|
|
643
|
+
"firebase-admin",
|
|
644
|
+
"auth_provider",
|
|
645
|
+
"Firebase Admin",
|
|
646
|
+
("admin.", "admin.auth()", "verifyIdToken("),
|
|
647
|
+
),
|
|
648
|
+
# ── JavaScript / TypeScript — Search ──────────────────────────────────
|
|
649
|
+
LibraryEntry(
|
|
650
|
+
"@elastic/elasticsearch",
|
|
651
|
+
"search",
|
|
652
|
+
"Elasticsearch (JS)",
|
|
653
|
+
("client.search(", "client.index(", "new Client("),
|
|
654
|
+
),
|
|
655
|
+
LibraryEntry("algoliasearch", "search", "Algolia", ("algoliasearch(", "index.search(")),
|
|
656
|
+
# ── JavaScript / TypeScript — Logging / Monitoring ────────────────────
|
|
657
|
+
LibraryEntry("pino", "logging", "Pino", ("pino(", "logger.info(", "logger.error(")),
|
|
658
|
+
LibraryEntry(
|
|
659
|
+
"winston", "logging", "Winston", ("winston.createLogger(", "logger.info(", "transports.")
|
|
660
|
+
),
|
|
661
|
+
LibraryEntry("bunyan", "logging", "Bunyan", ("bunyan.createLogger(", "log.info(")),
|
|
662
|
+
LibraryEntry(
|
|
663
|
+
"@sentry/node", "monitoring", "Sentry (Node)", ("Sentry.init(", "Sentry.captureException(")
|
|
664
|
+
),
|
|
665
|
+
LibraryEntry("dd-trace", "monitoring", "Datadog APM", ("tracer.", "dd-trace")),
|
|
666
|
+
# ── JavaScript / TypeScript — GraphQL ─────────────────────────────────
|
|
667
|
+
LibraryEntry(
|
|
668
|
+
"graphql", "api_framework", "GraphQL.js", ("graphql(", "buildSchema(", "graphqlHTTP(")
|
|
669
|
+
),
|
|
670
|
+
LibraryEntry(
|
|
671
|
+
"apollo-server", "api_framework", "Apollo Server", ("ApolloServer(", "gql`", "typeDefs")
|
|
672
|
+
),
|
|
673
|
+
LibraryEntry(
|
|
674
|
+
"@nestjs/graphql",
|
|
675
|
+
"api_framework",
|
|
676
|
+
"NestJS GraphQL",
|
|
677
|
+
("GraphQLModule.", "@Resolver", "@ObjectType"),
|
|
678
|
+
),
|
|
679
|
+
# ── JavaScript / TypeScript — File / Storage ──────────────────────────
|
|
680
|
+
LibraryEntry(
|
|
681
|
+
"multer",
|
|
682
|
+
"file_system",
|
|
683
|
+
"Multer (file upload)",
|
|
684
|
+
("multer(", "upload.single(", "upload.array("),
|
|
685
|
+
),
|
|
686
|
+
LibraryEntry(
|
|
687
|
+
"@nestjs/platform-express",
|
|
688
|
+
"file_system",
|
|
689
|
+
"NestJS file handling",
|
|
690
|
+
("FileInterceptor(", "FilesInterceptor("),
|
|
691
|
+
),
|
|
692
|
+
]
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
# ============================================================================
|
|
696
|
+
# SDK usage-pattern catalog (for extracting service-specific info)
|
|
697
|
+
# ============================================================================
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
@dataclass(frozen=True)
|
|
701
|
+
class SdkUsagePattern:
|
|
702
|
+
"""Describes how to extract service-specific info from an SDK init call."""
|
|
703
|
+
|
|
704
|
+
library: str # Library this pattern belongs to
|
|
705
|
+
call_pattern: str # Callee name to match (e.g. "boto3.client")
|
|
706
|
+
service_arg_position: int | None = None # Positional arg index
|
|
707
|
+
service_arg_keyword: str | None = None # Keyword arg name
|
|
708
|
+
result_type_prefix: str | None = None # Prepend to the integration name
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
_SDK_PATTERNS: list[SdkUsagePattern] = [
|
|
712
|
+
# boto3.client("s3") / boto3.client(service_name="s3")
|
|
713
|
+
SdkUsagePattern(
|
|
714
|
+
"boto3",
|
|
715
|
+
"boto3.client",
|
|
716
|
+
service_arg_position=0,
|
|
717
|
+
service_arg_keyword="service_name",
|
|
718
|
+
result_type_prefix="AWS",
|
|
719
|
+
),
|
|
720
|
+
SdkUsagePattern(
|
|
721
|
+
"boto3",
|
|
722
|
+
"boto3.resource",
|
|
723
|
+
service_arg_position=0,
|
|
724
|
+
service_arg_keyword="service_name",
|
|
725
|
+
result_type_prefix="AWS",
|
|
726
|
+
),
|
|
727
|
+
# google.cloud.storage.Client() → GCS
|
|
728
|
+
SdkUsagePattern("google.cloud", "storage.Client", result_type_prefix="GCP"),
|
|
729
|
+
SdkUsagePattern("google.cloud", "bigquery.Client", result_type_prefix="GCP"),
|
|
730
|
+
SdkUsagePattern("google.cloud", "pubsub_v1.PublisherClient", result_type_prefix="GCP"),
|
|
731
|
+
SdkUsagePattern("google.cloud", "firestore.Client", result_type_prefix="GCP"),
|
|
732
|
+
]
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
# ============================================================================
|
|
736
|
+
# HTTP target extraction catalog
|
|
737
|
+
# ============================================================================
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
@dataclass(frozen=True)
|
|
741
|
+
class HttpMethodPattern:
|
|
742
|
+
"""
|
|
743
|
+
Describes an HTTP client call that carries a target URL argument.
|
|
744
|
+
|
|
745
|
+
The detector scans call sites for these patterns to extract outbound
|
|
746
|
+
targets (URLs/hostnames) that the application communicates with.
|
|
747
|
+
"""
|
|
748
|
+
|
|
749
|
+
callee_contains: str # Match when callee_name contains this string
|
|
750
|
+
url_arg_position: int # Positional index of the URL argument (0-based)
|
|
751
|
+
url_keyword: str # Keyword argument name for the URL (fallback)
|
|
752
|
+
inferred_method: str # HTTP verb to tag (empty = infer from first arg)
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
_HTTP_TARGET_PATTERNS: list[HttpMethodPattern] = [
|
|
756
|
+
# ── requests ──────────────────────────────────────────────────────
|
|
757
|
+
HttpMethodPattern("requests.get", 0, "url", "GET"),
|
|
758
|
+
HttpMethodPattern("requests.post", 0, "url", "POST"),
|
|
759
|
+
HttpMethodPattern("requests.put", 0, "url", "PUT"),
|
|
760
|
+
HttpMethodPattern("requests.delete", 0, "url", "DELETE"),
|
|
761
|
+
HttpMethodPattern("requests.patch", 0, "url", "PATCH"),
|
|
762
|
+
HttpMethodPattern("requests.head", 0, "url", "HEAD"),
|
|
763
|
+
HttpMethodPattern("requests.options", 0, "url", "OPTIONS"),
|
|
764
|
+
HttpMethodPattern("requests.request", 1, "url", ""),
|
|
765
|
+
# ── httpx ─────────────────────────────────────────────────────────
|
|
766
|
+
HttpMethodPattern("httpx.get", 0, "url", "GET"),
|
|
767
|
+
HttpMethodPattern("httpx.post", 0, "url", "POST"),
|
|
768
|
+
HttpMethodPattern("httpx.put", 0, "url", "PUT"),
|
|
769
|
+
HttpMethodPattern("httpx.delete", 0, "url", "DELETE"),
|
|
770
|
+
HttpMethodPattern("httpx.patch", 0, "url", "PATCH"),
|
|
771
|
+
HttpMethodPattern("httpx.head", 0, "url", "HEAD"),
|
|
772
|
+
HttpMethodPattern("httpx.options", 0, "url", "OPTIONS"),
|
|
773
|
+
HttpMethodPattern("httpx.request", 1, "url", ""),
|
|
774
|
+
# ── urllib.request ────────────────────────────────────────────────
|
|
775
|
+
HttpMethodPattern("urllib.request.urlopen", 0, "url", "GET"),
|
|
776
|
+
HttpMethodPattern("urlopen", 0, "url", "GET"),
|
|
777
|
+
]
|
|
778
|
+
|
|
779
|
+
# Patterns matched by suffix only (for instance-method calls like
|
|
780
|
+
# ``session.get(url)``). These are only applied when the caller file
|
|
781
|
+
# has already been associated with a specific http_client library through
|
|
782
|
+
# Phase 1/2 detection, AND the receiver passes the blocklist check, AND
|
|
783
|
+
# the URL argument passes the plausibility filter.
|
|
784
|
+
_HTTP_INSTANCE_SUFFIXES: dict[str, str] = {
|
|
785
|
+
".get": "GET",
|
|
786
|
+
".post": "POST",
|
|
787
|
+
".put": "PUT",
|
|
788
|
+
".delete": "DELETE",
|
|
789
|
+
".patch": "PATCH",
|
|
790
|
+
".head": "HEAD",
|
|
791
|
+
".options": "OPTIONS",
|
|
792
|
+
".request": "",
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
# Receiver prefixes that indicate a framework/app/router object — not an
|
|
796
|
+
# HTTP client. Calls like ``app.get("/path")`` or ``router.post("/path")``
|
|
797
|
+
# are route registrations (often decorators), not outbound HTTP requests.
|
|
798
|
+
_RECEIVER_BLOCKLIST: set[str] = {
|
|
799
|
+
"app",
|
|
800
|
+
"router",
|
|
801
|
+
"api",
|
|
802
|
+
"api_router",
|
|
803
|
+
"self.app",
|
|
804
|
+
"self.router",
|
|
805
|
+
"self.api_router",
|
|
806
|
+
"cls.app",
|
|
807
|
+
"cls.router",
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
# Broader patterns checked via prefix/suffix matching when the exact
|
|
811
|
+
# receiver isn't in the blocklist.
|
|
812
|
+
_RECEIVER_BLOCKLIST_PATTERNS: tuple[str, ...] = (
|
|
813
|
+
"_app",
|
|
814
|
+
"_router",
|
|
815
|
+
"blueprint",
|
|
816
|
+
"flask_app",
|
|
817
|
+
"fastapi_app",
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
# ---------------------------------------------------------------------------
|
|
821
|
+
# Receiver ALLOWLIST — positive matching for HTTP client objects.
|
|
822
|
+
#
|
|
823
|
+
# Instance-suffix matching (`session.get(url)`) only fires when the
|
|
824
|
+
# receiver's base name appears in this set. This prevents false positives
|
|
825
|
+
# from unrelated `.get()` calls (e.g. `dict.get`, `plugins.get`,
|
|
826
|
+
# `instance.get`, `headers.get`).
|
|
827
|
+
# ---------------------------------------------------------------------------
|
|
828
|
+
_HTTP_CLIENT_RECEIVER_NAMES: set[str] = {
|
|
829
|
+
# Library module names (for module-level calls via alias)
|
|
830
|
+
"requests",
|
|
831
|
+
"httpx",
|
|
832
|
+
"aiohttp",
|
|
833
|
+
"urllib",
|
|
834
|
+
"urllib3",
|
|
835
|
+
# Common variable names for HTTP client objects
|
|
836
|
+
"session",
|
|
837
|
+
"http_session",
|
|
838
|
+
"http",
|
|
839
|
+
"http_client",
|
|
840
|
+
"client",
|
|
841
|
+
"api_client",
|
|
842
|
+
"rest_client",
|
|
843
|
+
"web_client",
|
|
844
|
+
"conn",
|
|
845
|
+
"connection",
|
|
846
|
+
"transport",
|
|
847
|
+
"resp",
|
|
848
|
+
"response",
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
# Suffix patterns: a receiver whose last dotted segment ends with one of
|
|
852
|
+
# these is treated as an HTTP client (e.g. ``self.session``,
|
|
853
|
+
# ``self._http_client``, ``cls.client``).
|
|
854
|
+
_HTTP_CLIENT_RECEIVER_SUFFIXES: tuple[str, ...] = (
|
|
855
|
+
"session",
|
|
856
|
+
"client",
|
|
857
|
+
"http",
|
|
858
|
+
"http_client",
|
|
859
|
+
"transport",
|
|
860
|
+
"connection",
|
|
861
|
+
"conn",
|
|
862
|
+
"requests",
|
|
863
|
+
"httpx",
|
|
864
|
+
"aiohttp",
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
# ============================================================================
|
|
869
|
+
# Usage location tracking
|
|
870
|
+
# ============================================================================
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
@dataclass
|
|
874
|
+
class UsageLocation:
|
|
875
|
+
"""A location where an integration is actually used."""
|
|
876
|
+
|
|
877
|
+
file: str
|
|
878
|
+
line: int
|
|
879
|
+
function: str | None
|
|
880
|
+
call_expression: str
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
@dataclass
|
|
884
|
+
class ExtractedTarget:
|
|
885
|
+
"""
|
|
886
|
+
An outbound HTTP call target extracted from a call site argument.
|
|
887
|
+
|
|
888
|
+
Represents a concrete URL/hostname the application connects to at runtime.
|
|
889
|
+
"""
|
|
890
|
+
|
|
891
|
+
base_url: str
|
|
892
|
+
path_pattern: str = ""
|
|
893
|
+
http_method: str = ""
|
|
894
|
+
is_literal: bool = True
|
|
895
|
+
file: str = ""
|
|
896
|
+
line: int = 0
|
|
897
|
+
function: str | None = None
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
@dataclass
|
|
901
|
+
class DetectedIntegration:
|
|
902
|
+
"""Full detection result for one integration."""
|
|
903
|
+
|
|
904
|
+
integration_type: str
|
|
905
|
+
name: str
|
|
906
|
+
detection_methods: list[str] = field(default_factory=list)
|
|
907
|
+
locations: list[UsageLocation] = field(default_factory=list)
|
|
908
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
909
|
+
sub_services: list[str] = field(default_factory=list)
|
|
910
|
+
targets: list[ExtractedTarget] = field(default_factory=list)
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
# ============================================================================
|
|
914
|
+
# IntegrationDetector
|
|
915
|
+
# ============================================================================
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
class IntegrationDetector:
|
|
919
|
+
"""
|
|
920
|
+
Detects external integrations through import analysis, call-site
|
|
921
|
+
scanning, and SDK usage-pattern extraction.
|
|
922
|
+
"""
|
|
923
|
+
|
|
924
|
+
def __init__(self, parsed_files: list[ParsedFile]) -> None:
|
|
925
|
+
self._parsed_files = [pf for pf in parsed_files if pf.success]
|
|
926
|
+
self._catalog = _LIBRARY_CATALOG
|
|
927
|
+
self._sdk_patterns = _SDK_PATTERNS
|
|
928
|
+
|
|
929
|
+
def detect(self) -> list[DetectedIntegration]:
|
|
930
|
+
"""Run full detection pipeline and return results."""
|
|
931
|
+
results: dict[str, DetectedIntegration] = {}
|
|
932
|
+
|
|
933
|
+
# Phase 1: import-based detection
|
|
934
|
+
self._detect_from_imports(results)
|
|
935
|
+
|
|
936
|
+
# Phase 2: usage-location tracking (call-site scan)
|
|
937
|
+
self._track_usage_locations(results)
|
|
938
|
+
|
|
939
|
+
# Phase 3: SDK usage-pattern extraction
|
|
940
|
+
self._extract_sdk_services(results)
|
|
941
|
+
|
|
942
|
+
# Phase 4: HTTP target extraction (URLs/hostnames from call args)
|
|
943
|
+
self._extract_http_targets(results)
|
|
944
|
+
|
|
945
|
+
return list(results.values())
|
|
946
|
+
|
|
947
|
+
# ------------------------------------------------------------------
|
|
948
|
+
# Phase 1: import scanning
|
|
949
|
+
# ------------------------------------------------------------------
|
|
950
|
+
|
|
951
|
+
def _detect_from_imports(self, results: dict[str, DetectedIntegration]) -> None:
|
|
952
|
+
for pf in self._parsed_files:
|
|
953
|
+
for imp in pf.imports:
|
|
954
|
+
full_module = imp.module or ""
|
|
955
|
+
top_module = full_module.split(".")[0]
|
|
956
|
+
for entry in self._catalog:
|
|
957
|
+
pat = entry.import_pattern
|
|
958
|
+
if pat == top_module or pat in full_module or full_module.startswith(pat):
|
|
959
|
+
key = entry.integration_name
|
|
960
|
+
if key not in results:
|
|
961
|
+
results[key] = DetectedIntegration(
|
|
962
|
+
integration_type=entry.integration_type,
|
|
963
|
+
name=entry.integration_name,
|
|
964
|
+
detection_methods=["import"],
|
|
965
|
+
)
|
|
966
|
+
elif "import" not in results[key].detection_methods:
|
|
967
|
+
results[key].detection_methods.append("import")
|
|
968
|
+
|
|
969
|
+
# ------------------------------------------------------------------
|
|
970
|
+
# Phase 2: usage-location tracking
|
|
971
|
+
# ------------------------------------------------------------------
|
|
972
|
+
|
|
973
|
+
def _track_usage_locations(self, results: dict[str, DetectedIntegration]) -> None:
|
|
974
|
+
"""For each detected integration, find call sites that use it."""
|
|
975
|
+
if not results:
|
|
976
|
+
return
|
|
977
|
+
|
|
978
|
+
prefix_to_integration: dict[str, str] = {}
|
|
979
|
+
for entry in self._catalog:
|
|
980
|
+
if entry.integration_name in results:
|
|
981
|
+
for prefix in entry.call_prefixes:
|
|
982
|
+
prefix_to_integration[prefix] = entry.integration_name
|
|
983
|
+
|
|
984
|
+
if not prefix_to_integration:
|
|
985
|
+
return
|
|
986
|
+
|
|
987
|
+
for pf in self._parsed_files:
|
|
988
|
+
file_str = str(pf.path)
|
|
989
|
+
for cs in pf.call_sites:
|
|
990
|
+
callee = cs.callee_name
|
|
991
|
+
for prefix, int_name in prefix_to_integration.items():
|
|
992
|
+
if prefix in callee:
|
|
993
|
+
det = results[int_name]
|
|
994
|
+
if "sdk_usage" not in det.detection_methods:
|
|
995
|
+
det.detection_methods.append("sdk_usage")
|
|
996
|
+
caller = cs.caller_function.full if cs.caller_function else None
|
|
997
|
+
det.locations.append(
|
|
998
|
+
UsageLocation(
|
|
999
|
+
file=file_str,
|
|
1000
|
+
line=cs.location.line,
|
|
1001
|
+
function=caller,
|
|
1002
|
+
call_expression=callee,
|
|
1003
|
+
)
|
|
1004
|
+
)
|
|
1005
|
+
break # one match per call site is enough
|
|
1006
|
+
|
|
1007
|
+
# ------------------------------------------------------------------
|
|
1008
|
+
# Phase 3: SDK usage-pattern extraction
|
|
1009
|
+
# ------------------------------------------------------------------
|
|
1010
|
+
|
|
1011
|
+
def _extract_sdk_services(self, results: dict[str, DetectedIntegration]) -> None:
|
|
1012
|
+
"""Extract service-specific info from SDK init calls."""
|
|
1013
|
+
patterns_by_lib: dict[str, list[SdkUsagePattern]] = {}
|
|
1014
|
+
for sp in self._sdk_patterns:
|
|
1015
|
+
patterns_by_lib.setdefault(sp.library, []).append(sp)
|
|
1016
|
+
|
|
1017
|
+
relevant_libs = {sp.library for sp in self._sdk_patterns} & {
|
|
1018
|
+
e.import_pattern for e in self._catalog if e.integration_name in results
|
|
1019
|
+
}
|
|
1020
|
+
if not relevant_libs:
|
|
1021
|
+
return
|
|
1022
|
+
|
|
1023
|
+
for pf in self._parsed_files:
|
|
1024
|
+
for cs in pf.call_sites:
|
|
1025
|
+
callee = cs.callee_name
|
|
1026
|
+
for lib in relevant_libs:
|
|
1027
|
+
for sp in patterns_by_lib.get(lib, []):
|
|
1028
|
+
if sp.call_pattern not in callee:
|
|
1029
|
+
continue
|
|
1030
|
+
service_name = self._extract_service_arg(cs, sp)
|
|
1031
|
+
if not service_name:
|
|
1032
|
+
continue
|
|
1033
|
+
parent_name = next(
|
|
1034
|
+
(e.integration_name for e in self._catalog if e.import_pattern == lib),
|
|
1035
|
+
lib,
|
|
1036
|
+
)
|
|
1037
|
+
if parent_name not in results:
|
|
1038
|
+
continue
|
|
1039
|
+
det = results[parent_name]
|
|
1040
|
+
display = (
|
|
1041
|
+
f"{sp.result_type_prefix} {service_name}"
|
|
1042
|
+
if sp.result_type_prefix
|
|
1043
|
+
else service_name
|
|
1044
|
+
)
|
|
1045
|
+
if display not in det.sub_services:
|
|
1046
|
+
det.sub_services.append(display)
|
|
1047
|
+
det.metadata.setdefault("services", [])
|
|
1048
|
+
if service_name not in det.metadata["services"]:
|
|
1049
|
+
det.metadata["services"].append(service_name)
|
|
1050
|
+
|
|
1051
|
+
@staticmethod
|
|
1052
|
+
def _extract_service_arg(
|
|
1053
|
+
call_site: ParsedCallSite, # noqa: F821
|
|
1054
|
+
pattern: SdkUsagePattern,
|
|
1055
|
+
) -> str | None:
|
|
1056
|
+
"""Extract the service name from a call site's arguments."""
|
|
1057
|
+
|
|
1058
|
+
for arg in call_site.arguments:
|
|
1059
|
+
# Match by keyword
|
|
1060
|
+
if pattern.service_arg_keyword and arg.name == pattern.service_arg_keyword:
|
|
1061
|
+
if arg.is_literal and isinstance(arg.literal_value, str):
|
|
1062
|
+
return arg.literal_value
|
|
1063
|
+
# Match by position
|
|
1064
|
+
if (
|
|
1065
|
+
pattern.service_arg_position is not None
|
|
1066
|
+
and arg.position == pattern.service_arg_position
|
|
1067
|
+
):
|
|
1068
|
+
if arg.is_literal and isinstance(arg.literal_value, str):
|
|
1069
|
+
return arg.literal_value
|
|
1070
|
+
return None
|
|
1071
|
+
|
|
1072
|
+
# ------------------------------------------------------------------
|
|
1073
|
+
# Phase 4: HTTP target extraction
|
|
1074
|
+
# ------------------------------------------------------------------
|
|
1075
|
+
|
|
1076
|
+
def _extract_http_targets(self, results: dict[str, DetectedIntegration]) -> None:
|
|
1077
|
+
"""
|
|
1078
|
+
For every detected ``http_client`` integration, scan call sites to
|
|
1079
|
+
extract the target URL/hostname from the first positional argument
|
|
1080
|
+
(or ``url=`` keyword).
|
|
1081
|
+
|
|
1082
|
+
Populates ``DetectedIntegration.targets`` with :class:`ExtractedTarget`
|
|
1083
|
+
instances. Non-literal arguments (variables, f-strings) are recorded
|
|
1084
|
+
with ``is_literal=False`` and the raw expression stored in
|
|
1085
|
+
``base_url`` for downstream analysis.
|
|
1086
|
+
"""
|
|
1087
|
+
http_integrations = {
|
|
1088
|
+
name: det for name, det in results.items() if det.integration_type == "http_client"
|
|
1089
|
+
}
|
|
1090
|
+
if not http_integrations:
|
|
1091
|
+
return
|
|
1092
|
+
|
|
1093
|
+
http_lib_names = set(http_integrations.keys())
|
|
1094
|
+
files_with_http = self._files_importing_http_client(http_lib_names)
|
|
1095
|
+
|
|
1096
|
+
for pf in self._parsed_files:
|
|
1097
|
+
file_str = str(pf.path)
|
|
1098
|
+
file_has_http = file_str in files_with_http or pf.path in files_with_http
|
|
1099
|
+
|
|
1100
|
+
for cs in pf.call_sites:
|
|
1101
|
+
callee = cs.callee_name
|
|
1102
|
+
|
|
1103
|
+
target = self._try_named_pattern(cs, callee, file_str)
|
|
1104
|
+
|
|
1105
|
+
if target is None and file_has_http:
|
|
1106
|
+
target = self._try_instance_suffix(cs, callee, file_str)
|
|
1107
|
+
|
|
1108
|
+
if target is None:
|
|
1109
|
+
continue
|
|
1110
|
+
|
|
1111
|
+
parent = self._resolve_parent_integration(callee, http_integrations)
|
|
1112
|
+
if parent is None and file_has_http:
|
|
1113
|
+
parent = next(iter(http_integrations.values()), None)
|
|
1114
|
+
if parent is None:
|
|
1115
|
+
continue
|
|
1116
|
+
|
|
1117
|
+
if not any(
|
|
1118
|
+
t.base_url == target.base_url
|
|
1119
|
+
and t.path_pattern == target.path_pattern
|
|
1120
|
+
and t.http_method == target.http_method
|
|
1121
|
+
for t in parent.targets
|
|
1122
|
+
):
|
|
1123
|
+
parent.targets.append(target)
|
|
1124
|
+
|
|
1125
|
+
def _files_importing_http_client(self, http_lib_names: set[str]) -> set[str]:
|
|
1126
|
+
"""Return file paths that import any known HTTP client library."""
|
|
1127
|
+
lib_patterns = {
|
|
1128
|
+
e.import_pattern
|
|
1129
|
+
for e in self._catalog
|
|
1130
|
+
if e.integration_type == "http_client" and e.integration_name in http_lib_names
|
|
1131
|
+
}
|
|
1132
|
+
result: set[str] = set()
|
|
1133
|
+
for pf in self._parsed_files:
|
|
1134
|
+
for imp in pf.imports:
|
|
1135
|
+
mod = imp.module or ""
|
|
1136
|
+
top = mod.split(".")[0]
|
|
1137
|
+
if any(pat == top or pat in mod or mod.startswith(pat) for pat in lib_patterns):
|
|
1138
|
+
result.add(str(pf.path))
|
|
1139
|
+
break
|
|
1140
|
+
return result
|
|
1141
|
+
|
|
1142
|
+
def _try_named_pattern(
|
|
1143
|
+
self,
|
|
1144
|
+
cs: ParsedCallSite,
|
|
1145
|
+
callee: str,
|
|
1146
|
+
file_str: str, # noqa: F821
|
|
1147
|
+
) -> ExtractedTarget | None:
|
|
1148
|
+
"""Try to match a fully-qualified pattern like ``requests.get``."""
|
|
1149
|
+
for pat in _HTTP_TARGET_PATTERNS:
|
|
1150
|
+
if pat.callee_contains not in callee:
|
|
1151
|
+
continue
|
|
1152
|
+
url_val = self._extract_url_arg(cs, pat.url_arg_position, pat.url_keyword)
|
|
1153
|
+
if url_val is None:
|
|
1154
|
+
continue
|
|
1155
|
+
method = pat.inferred_method
|
|
1156
|
+
if not method and pat.url_arg_position == 1:
|
|
1157
|
+
method = self._infer_method_from_arg(cs, 0)
|
|
1158
|
+
return self._build_target(url_val, method, cs, file_str)
|
|
1159
|
+
return None
|
|
1160
|
+
|
|
1161
|
+
def _try_instance_suffix(
|
|
1162
|
+
self,
|
|
1163
|
+
cs: ParsedCallSite,
|
|
1164
|
+
callee: str,
|
|
1165
|
+
file_str: str, # noqa: F821
|
|
1166
|
+
) -> ExtractedTarget | None:
|
|
1167
|
+
"""Match instance-method patterns like ``session.get(url)``.
|
|
1168
|
+
|
|
1169
|
+
Applies four layers of false-positive prevention:
|
|
1170
|
+
1. Receiver blocklist — skip ``app.get``, ``router.post``, etc.
|
|
1171
|
+
2. Receiver allowlist — the receiver must look like an HTTP client
|
|
1172
|
+
object (e.g. ``session``, ``client``, ``self.http_client``).
|
|
1173
|
+
3. URL plausibility — the argument must look like a URL or API path.
|
|
1174
|
+
4. File-level HTTP import gating (enforced by the caller).
|
|
1175
|
+
"""
|
|
1176
|
+
for suffix, method in _HTTP_INSTANCE_SUFFIXES.items():
|
|
1177
|
+
if not callee.endswith(suffix):
|
|
1178
|
+
continue
|
|
1179
|
+
receiver = callee[: -len(suffix)]
|
|
1180
|
+
if not receiver or receiver.endswith("."):
|
|
1181
|
+
continue
|
|
1182
|
+
|
|
1183
|
+
if self._is_blocked_receiver(receiver):
|
|
1184
|
+
continue
|
|
1185
|
+
|
|
1186
|
+
if not self._is_http_client_receiver(receiver):
|
|
1187
|
+
continue
|
|
1188
|
+
|
|
1189
|
+
url_pos = 1 if suffix == ".request" else 0
|
|
1190
|
+
url_val = self._extract_url_arg(cs, url_pos, "url")
|
|
1191
|
+
if url_val is None:
|
|
1192
|
+
continue
|
|
1193
|
+
|
|
1194
|
+
if url_val["is_literal"] and not self._is_plausible_url(url_val["value"]):
|
|
1195
|
+
continue
|
|
1196
|
+
|
|
1197
|
+
actual_method = method
|
|
1198
|
+
if not actual_method and suffix == ".request":
|
|
1199
|
+
actual_method = self._infer_method_from_arg(cs, 0)
|
|
1200
|
+
return self._build_target(url_val, actual_method, cs, file_str)
|
|
1201
|
+
return None
|
|
1202
|
+
|
|
1203
|
+
@staticmethod
|
|
1204
|
+
def _is_blocked_receiver(receiver: str) -> bool:
|
|
1205
|
+
"""Return True if the receiver looks like a framework app/router."""
|
|
1206
|
+
receiver_lower = receiver.lower()
|
|
1207
|
+
if receiver_lower in _RECEIVER_BLOCKLIST:
|
|
1208
|
+
return True
|
|
1209
|
+
for pattern in _RECEIVER_BLOCKLIST_PATTERNS:
|
|
1210
|
+
if receiver_lower.startswith(pattern) or receiver_lower.endswith(pattern):
|
|
1211
|
+
return True
|
|
1212
|
+
return False
|
|
1213
|
+
|
|
1214
|
+
@staticmethod
|
|
1215
|
+
def _is_http_client_receiver(receiver: str) -> bool:
|
|
1216
|
+
"""Return True if the receiver looks like an HTTP client object.
|
|
1217
|
+
|
|
1218
|
+
Uses positive matching: the receiver's base name (last dotted
|
|
1219
|
+
segment, stripped of leading underscores) must be a known HTTP
|
|
1220
|
+
client name or end with a known suffix.
|
|
1221
|
+
|
|
1222
|
+
Examples that pass:
|
|
1223
|
+
``session``, ``self.session``, ``self._http_client``,
|
|
1224
|
+
``cls.client``, ``api_client``
|
|
1225
|
+
|
|
1226
|
+
Examples that fail:
|
|
1227
|
+
``plugins``, ``db``, ``cache``, ``instance.instance``,
|
|
1228
|
+
``headers``, ``dict``
|
|
1229
|
+
"""
|
|
1230
|
+
base = receiver.rsplit(".", 1)[-1].lstrip("_").lower()
|
|
1231
|
+
|
|
1232
|
+
if base in _HTTP_CLIENT_RECEIVER_NAMES:
|
|
1233
|
+
return True
|
|
1234
|
+
|
|
1235
|
+
return any(base.endswith(suffix) for suffix in _HTTP_CLIENT_RECEIVER_SUFFIXES)
|
|
1236
|
+
|
|
1237
|
+
@staticmethod
|
|
1238
|
+
def _is_plausible_url(value: str) -> bool:
|
|
1239
|
+
"""Return True if a literal string looks like a URL or API path.
|
|
1240
|
+
|
|
1241
|
+
Rejects short dict-key-like strings (``"in"``, ``"value"``,
|
|
1242
|
+
``"required"``) that get captured by ``dict.get()`` matching.
|
|
1243
|
+
"""
|
|
1244
|
+
if "://" in value:
|
|
1245
|
+
return True
|
|
1246
|
+
if value.startswith("//"):
|
|
1247
|
+
return True
|
|
1248
|
+
if value.startswith("/") and len(value) > 1:
|
|
1249
|
+
return True
|
|
1250
|
+
return bool("." in value and len(value) > 4)
|
|
1251
|
+
|
|
1252
|
+
@staticmethod
|
|
1253
|
+
def _extract_url_arg(
|
|
1254
|
+
cs: ParsedCallSite,
|
|
1255
|
+
position: int,
|
|
1256
|
+
keyword: str, # noqa: F821
|
|
1257
|
+
) -> dict[str, Any] | None:
|
|
1258
|
+
"""
|
|
1259
|
+
Pull the URL value from a call site's arguments.
|
|
1260
|
+
|
|
1261
|
+
Returns a dict ``{"value": <str>, "is_literal": <bool>}`` or ``None``.
|
|
1262
|
+
"""
|
|
1263
|
+
for arg in cs.arguments:
|
|
1264
|
+
if arg.name == keyword:
|
|
1265
|
+
if arg.is_literal and isinstance(arg.literal_value, str):
|
|
1266
|
+
return {"value": arg.literal_value, "is_literal": True}
|
|
1267
|
+
expr = arg.expression_text or arg.variable_name or ""
|
|
1268
|
+
if expr:
|
|
1269
|
+
return {"value": expr, "is_literal": False}
|
|
1270
|
+
|
|
1271
|
+
for arg in cs.arguments:
|
|
1272
|
+
if arg.position == position and arg.name is None:
|
|
1273
|
+
if arg.is_literal and isinstance(arg.literal_value, str):
|
|
1274
|
+
return {"value": arg.literal_value, "is_literal": True}
|
|
1275
|
+
expr = arg.expression_text or arg.variable_name or ""
|
|
1276
|
+
if expr:
|
|
1277
|
+
return {"value": expr, "is_literal": False}
|
|
1278
|
+
|
|
1279
|
+
return None
|
|
1280
|
+
|
|
1281
|
+
@staticmethod
|
|
1282
|
+
def _infer_method_from_arg(cs: ParsedCallSite, position: int) -> str: # noqa: F821
|
|
1283
|
+
"""Read position-0 argument to determine the HTTP method string."""
|
|
1284
|
+
for arg in cs.arguments:
|
|
1285
|
+
if arg.position == position and arg.is_literal and isinstance(arg.literal_value, str):
|
|
1286
|
+
return arg.literal_value.upper()
|
|
1287
|
+
return ""
|
|
1288
|
+
|
|
1289
|
+
@staticmethod
|
|
1290
|
+
def _build_target(
|
|
1291
|
+
url_info: dict[str, Any],
|
|
1292
|
+
http_method: str,
|
|
1293
|
+
cs: ParsedCallSite, # noqa: F821
|
|
1294
|
+
file_str: str,
|
|
1295
|
+
) -> ExtractedTarget:
|
|
1296
|
+
raw_url: str = url_info["value"]
|
|
1297
|
+
is_literal: bool = url_info["is_literal"]
|
|
1298
|
+
caller = cs.caller_function.full if cs.caller_function else None
|
|
1299
|
+
|
|
1300
|
+
base_url = raw_url
|
|
1301
|
+
path_pattern = ""
|
|
1302
|
+
|
|
1303
|
+
if is_literal and ("://" in raw_url or raw_url.startswith("//")):
|
|
1304
|
+
from urllib.parse import urlparse
|
|
1305
|
+
|
|
1306
|
+
parsed = urlparse(raw_url)
|
|
1307
|
+
scheme = parsed.scheme or "https"
|
|
1308
|
+
host = parsed.hostname or ""
|
|
1309
|
+
port = parsed.port
|
|
1310
|
+
if host:
|
|
1311
|
+
base_url = f"{scheme}://{host}"
|
|
1312
|
+
if port and port not in (80, 443):
|
|
1313
|
+
base_url += f":{port}"
|
|
1314
|
+
path_pattern = parsed.path or ""
|
|
1315
|
+
# else: keep raw_url as base_url (e.g. malformed but still useful)
|
|
1316
|
+
|
|
1317
|
+
return ExtractedTarget(
|
|
1318
|
+
base_url=base_url,
|
|
1319
|
+
path_pattern=path_pattern,
|
|
1320
|
+
http_method=http_method.upper() if http_method else "",
|
|
1321
|
+
is_literal=is_literal,
|
|
1322
|
+
file=file_str,
|
|
1323
|
+
line=cs.location.line,
|
|
1324
|
+
function=caller,
|
|
1325
|
+
)
|
|
1326
|
+
|
|
1327
|
+
@staticmethod
|
|
1328
|
+
def _resolve_parent_integration(
|
|
1329
|
+
callee: str,
|
|
1330
|
+
http_integrations: dict[str, DetectedIntegration],
|
|
1331
|
+
) -> DetectedIntegration | None:
|
|
1332
|
+
"""Map a callee back to its parent http_client integration."""
|
|
1333
|
+
callee_lower = callee.lower()
|
|
1334
|
+
for name, det in http_integrations.items():
|
|
1335
|
+
if name.lower() in callee_lower:
|
|
1336
|
+
return det
|
|
1337
|
+
return None
|
|
1338
|
+
|
|
1339
|
+
# ------------------------------------------------------------------
|
|
1340
|
+
# Conversion to manifest models
|
|
1341
|
+
# ------------------------------------------------------------------
|
|
1342
|
+
|
|
1343
|
+
def to_manifest_models(
|
|
1344
|
+
self, detections: list[DetectedIntegration] | None = None
|
|
1345
|
+
) -> list[IntegrationModel]:
|
|
1346
|
+
"""Convert detections to manifest IntegrationModel list."""
|
|
1347
|
+
from ..core.manifest import IntegrationModel, IntegrationTargetModel, LocationModel
|
|
1348
|
+
|
|
1349
|
+
if detections is None:
|
|
1350
|
+
detections = self.detect()
|
|
1351
|
+
|
|
1352
|
+
models: list[IntegrationModel] = []
|
|
1353
|
+
for det in detections:
|
|
1354
|
+
locations = [
|
|
1355
|
+
LocationModel(file=loc.file, line=loc.line)
|
|
1356
|
+
for loc in det.locations[:50] # cap to avoid huge manifests
|
|
1357
|
+
]
|
|
1358
|
+
|
|
1359
|
+
metadata: dict[str, Any] = {}
|
|
1360
|
+
if det.sub_services:
|
|
1361
|
+
metadata["sub_services"] = det.sub_services
|
|
1362
|
+
if det.metadata:
|
|
1363
|
+
metadata.update(det.metadata)
|
|
1364
|
+
|
|
1365
|
+
name = det.name
|
|
1366
|
+
if det.sub_services:
|
|
1367
|
+
name = f"{det.name} ({', '.join(det.sub_services[:5])})"
|
|
1368
|
+
|
|
1369
|
+
targets = [
|
|
1370
|
+
IntegrationTargetModel(
|
|
1371
|
+
base_url=t.base_url,
|
|
1372
|
+
path_pattern=t.path_pattern,
|
|
1373
|
+
http_method=t.http_method,
|
|
1374
|
+
is_literal=t.is_literal,
|
|
1375
|
+
called_from=LocationModel(
|
|
1376
|
+
file=t.file,
|
|
1377
|
+
line=t.line,
|
|
1378
|
+
function=t.function,
|
|
1379
|
+
)
|
|
1380
|
+
if t.file
|
|
1381
|
+
else None,
|
|
1382
|
+
)
|
|
1383
|
+
for t in det.targets[:100] # cap to avoid huge manifests
|
|
1384
|
+
]
|
|
1385
|
+
|
|
1386
|
+
models.append(
|
|
1387
|
+
IntegrationModel(
|
|
1388
|
+
id=stable_id("int", det.integration_type, det.name),
|
|
1389
|
+
type=det.integration_type,
|
|
1390
|
+
name=name,
|
|
1391
|
+
locations=locations,
|
|
1392
|
+
detection_method=", ".join(det.detection_methods),
|
|
1393
|
+
confidence="HIGH" if len(det.detection_methods) > 1 else "MEDIUM",
|
|
1394
|
+
targets=targets,
|
|
1395
|
+
metadata=metadata,
|
|
1396
|
+
)
|
|
1397
|
+
)
|
|
1398
|
+
|
|
1399
|
+
return models
|