cartography 0.116.0__py3-none-any.whl → 0.117.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/client/core/tx.py +23 -2
- cartography/intel/aws/ecr_image_layers.py +174 -21
- cartography/intel/azure/__init__.py +16 -0
- cartography/intel/azure/container_instances.py +95 -0
- cartography/intel/azure/data_lake.py +124 -0
- cartography/intel/github/teams.py +3 -3
- cartography/models/aws/ecr/image.py +30 -1
- cartography/models/azure/container_instance.py +55 -0
- cartography/models/azure/data_lake_filesystem.py +51 -0
- cartography/rules/cli.py +8 -6
- cartography/rules/data/frameworks/mitre_attack/__init__.py +7 -1
- cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +317 -0
- cartography/rules/data/frameworks/mitre_attack/requirements/t1190_exploit_public_facing_application/__init__.py +1 -0
- cartography/rules/spec/model.py +13 -0
- {cartography-0.116.0.dist-info → cartography-0.117.0.dist-info}/METADATA +4 -2
- {cartography-0.116.0.dist-info → cartography-0.117.0.dist-info}/RECORD +21 -16
- {cartography-0.116.0.dist-info → cartography-0.117.0.dist-info}/WHEEL +0 -0
- {cartography-0.116.0.dist-info → cartography-0.117.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.116.0.dist-info → cartography-0.117.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.116.0.dist-info → cartography-0.117.0.dist-info}/top_level.txt +0 -0
cartography/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.117.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 117, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
cartography/client/core/tx.py
CHANGED
|
@@ -6,6 +6,7 @@ from typing import Optional
|
|
|
6
6
|
from typing import Tuple
|
|
7
7
|
from typing import Union
|
|
8
8
|
|
|
9
|
+
import backoff
|
|
9
10
|
import neo4j
|
|
10
11
|
|
|
11
12
|
from cartography.graph.querybuilder import build_create_index_queries
|
|
@@ -14,11 +15,31 @@ from cartography.graph.querybuilder import build_ingestion_query
|
|
|
14
15
|
from cartography.graph.querybuilder import build_matchlink_query
|
|
15
16
|
from cartography.models.core.nodes import CartographyNodeSchema
|
|
16
17
|
from cartography.models.core.relationships import CartographyRelSchema
|
|
18
|
+
from cartography.util import backoff_handler
|
|
17
19
|
from cartography.util import batch
|
|
18
20
|
|
|
19
21
|
logger = logging.getLogger(__name__)
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
@backoff.on_exception( # type: ignore
|
|
25
|
+
backoff.expo,
|
|
26
|
+
(
|
|
27
|
+
ConnectionResetError,
|
|
28
|
+
neo4j.exceptions.ServiceUnavailable,
|
|
29
|
+
neo4j.exceptions.SessionExpired,
|
|
30
|
+
neo4j.exceptions.TransientError,
|
|
31
|
+
),
|
|
32
|
+
max_tries=5,
|
|
33
|
+
on_backoff=backoff_handler,
|
|
34
|
+
)
|
|
35
|
+
def _run_index_query_with_retry(neo4j_session: neo4j.Session, query: str) -> None:
|
|
36
|
+
"""
|
|
37
|
+
Execute an index creation query with retry logic.
|
|
38
|
+
Index creation requires autocommit transactions and can experience transient errors.
|
|
39
|
+
"""
|
|
40
|
+
neo4j_session.run(query)
|
|
41
|
+
|
|
42
|
+
|
|
22
43
|
def run_write_query(
|
|
23
44
|
neo4j_session: neo4j.Session, query: str, **parameters: Any
|
|
24
45
|
) -> None:
|
|
@@ -269,7 +290,7 @@ def ensure_indexes(
|
|
|
269
290
|
raise ValueError(
|
|
270
291
|
'Query provided to `ensure_indexes()` does not start with "CREATE INDEX IF NOT EXISTS".',
|
|
271
292
|
)
|
|
272
|
-
neo4j_session
|
|
293
|
+
_run_index_query_with_retry(neo4j_session, query)
|
|
273
294
|
|
|
274
295
|
|
|
275
296
|
def ensure_indexes_for_matchlinks(
|
|
@@ -288,7 +309,7 @@ def ensure_indexes_for_matchlinks(
|
|
|
288
309
|
raise ValueError(
|
|
289
310
|
'Query provided to `ensure_indexes_for_matchlinks()` does not start with "CREATE INDEX IF NOT EXISTS".',
|
|
290
311
|
)
|
|
291
|
-
neo4j_session
|
|
312
|
+
_run_index_query_with_retry(neo4j_session, query)
|
|
292
313
|
|
|
293
314
|
|
|
294
315
|
def load(
|
|
@@ -170,6 +170,111 @@ async def get_blob_json_via_presigned(
|
|
|
170
170
|
return response.json()
|
|
171
171
|
|
|
172
172
|
|
|
173
|
+
async def _extract_parent_image_from_attestation(
|
|
174
|
+
ecr_client: ECRClient,
|
|
175
|
+
repo_name: str,
|
|
176
|
+
attestation_manifest_digest: str,
|
|
177
|
+
http_client: httpx.AsyncClient,
|
|
178
|
+
) -> Optional[dict[str, str]]:
|
|
179
|
+
"""
|
|
180
|
+
Extract parent image information from an in-toto provenance attestation.
|
|
181
|
+
|
|
182
|
+
This function fetches an attestation manifest, downloads its in-toto layer,
|
|
183
|
+
and extracts the parent image reference from the SLSA provenance materials.
|
|
184
|
+
|
|
185
|
+
:param ecr_client: ECR client for fetching manifests and layers
|
|
186
|
+
:param repo_name: ECR repository name
|
|
187
|
+
:param attestation_manifest_digest: Digest of the attestation manifest
|
|
188
|
+
:param http_client: HTTP client for downloading blobs
|
|
189
|
+
:return: Dict with parent_image_uri and parent_image_digest, or None if no parent image found
|
|
190
|
+
"""
|
|
191
|
+
try:
|
|
192
|
+
attestation_manifest, _ = await batch_get_manifest(
|
|
193
|
+
ecr_client,
|
|
194
|
+
repo_name,
|
|
195
|
+
attestation_manifest_digest,
|
|
196
|
+
[ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
if not attestation_manifest:
|
|
200
|
+
logger.debug(
|
|
201
|
+
"No attestation manifest found for digest %s in repo %s",
|
|
202
|
+
attestation_manifest_digest,
|
|
203
|
+
repo_name,
|
|
204
|
+
)
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
# Get the in-toto layer from the attestation manifest
|
|
208
|
+
layers = attestation_manifest.get("layers", [])
|
|
209
|
+
intoto_layer = next(
|
|
210
|
+
(
|
|
211
|
+
layer
|
|
212
|
+
for layer in layers
|
|
213
|
+
if "in-toto" in layer.get("mediaType", "").lower()
|
|
214
|
+
),
|
|
215
|
+
None,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if not intoto_layer:
|
|
219
|
+
logger.debug(
|
|
220
|
+
"No in-toto layer found in attestation manifest %s",
|
|
221
|
+
attestation_manifest_digest,
|
|
222
|
+
)
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
# Download the in-toto attestation blob
|
|
226
|
+
intoto_digest = intoto_layer.get("digest")
|
|
227
|
+
if not intoto_digest:
|
|
228
|
+
logger.debug("No digest found for in-toto layer")
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
attestation_blob = await get_blob_json_via_presigned(
|
|
232
|
+
ecr_client,
|
|
233
|
+
repo_name,
|
|
234
|
+
intoto_digest,
|
|
235
|
+
http_client,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if not attestation_blob:
|
|
239
|
+
logger.debug("Failed to download attestation blob")
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
# Extract parent image from SLSA provenance materials
|
|
243
|
+
materials = attestation_blob.get("predicate", {}).get("materials", [])
|
|
244
|
+
for material in materials:
|
|
245
|
+
uri = material.get("uri", "")
|
|
246
|
+
uri_l = uri.lower()
|
|
247
|
+
# Look for container image URIs that are NOT the dockerfile itself
|
|
248
|
+
is_container_ref = (
|
|
249
|
+
uri_l.startswith("pkg:docker/")
|
|
250
|
+
or uri_l.startswith("pkg:oci/")
|
|
251
|
+
or uri_l.startswith("oci://")
|
|
252
|
+
)
|
|
253
|
+
if is_container_ref and "dockerfile" not in uri_l:
|
|
254
|
+
digest_obj = material.get("digest", {})
|
|
255
|
+
sha256_digest = digest_obj.get("sha256")
|
|
256
|
+
if sha256_digest:
|
|
257
|
+
return {
|
|
258
|
+
"parent_image_uri": uri,
|
|
259
|
+
"parent_image_digest": f"sha256:{sha256_digest}",
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
logger.debug(
|
|
263
|
+
"No parent image found in attestation materials for %s",
|
|
264
|
+
attestation_manifest_digest,
|
|
265
|
+
)
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
except Exception as e:
|
|
269
|
+
logger.warning(
|
|
270
|
+
"Error extracting parent image from attestation %s in repo %s: %s",
|
|
271
|
+
attestation_manifest_digest,
|
|
272
|
+
repo_name,
|
|
273
|
+
e,
|
|
274
|
+
)
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
|
|
173
278
|
async def _diff_ids_for_manifest(
|
|
174
279
|
ecr_client: ECRClient,
|
|
175
280
|
repo_name: str,
|
|
@@ -228,6 +333,7 @@ async def _diff_ids_for_manifest(
|
|
|
228
333
|
def transform_ecr_image_layers(
|
|
229
334
|
image_layers_data: dict[str, dict[str, list[str]]],
|
|
230
335
|
image_digest_map: dict[str, str],
|
|
336
|
+
image_attestation_map: Optional[dict[str, dict[str, str]]] = None,
|
|
231
337
|
) -> tuple[list[dict], list[dict]]:
|
|
232
338
|
"""
|
|
233
339
|
Transform image layer data into format suitable for Neo4j ingestion.
|
|
@@ -235,8 +341,11 @@ def transform_ecr_image_layers(
|
|
|
235
341
|
|
|
236
342
|
:param image_layers_data: Map of image URI to platform to diff_ids
|
|
237
343
|
:param image_digest_map: Map of image URI to image digest
|
|
344
|
+
:param image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
|
|
238
345
|
:return: List of layer objects ready for ingestion
|
|
239
346
|
"""
|
|
347
|
+
if image_attestation_map is None:
|
|
348
|
+
image_attestation_map = {}
|
|
240
349
|
layers_by_diff_id: dict[str, dict[str, Any]] = {}
|
|
241
350
|
memberships_by_digest: dict[str, dict[str, Any]] = {}
|
|
242
351
|
|
|
@@ -278,10 +387,20 @@ def transform_ecr_image_layers(
|
|
|
278
387
|
layer["tail_image_ids"].add(image_digest)
|
|
279
388
|
|
|
280
389
|
if ordered_layers_for_image:
|
|
281
|
-
|
|
390
|
+
membership: dict[str, Any] = {
|
|
282
391
|
"layer_diff_ids": ordered_layers_for_image,
|
|
283
392
|
}
|
|
284
393
|
|
|
394
|
+
# Add attestation data if available for this image
|
|
395
|
+
if image_uri in image_attestation_map:
|
|
396
|
+
attestation = image_attestation_map[image_uri]
|
|
397
|
+
membership["parent_image_uri"] = attestation["parent_image_uri"]
|
|
398
|
+
membership["parent_image_digest"] = attestation["parent_image_digest"]
|
|
399
|
+
membership["from_attestation"] = True
|
|
400
|
+
membership["confidence"] = "explicit"
|
|
401
|
+
|
|
402
|
+
memberships_by_digest[image_digest] = membership
|
|
403
|
+
|
|
285
404
|
# Convert sets back to lists for Neo4j ingestion
|
|
286
405
|
layers = []
|
|
287
406
|
for layer in layers_by_diff_id.values():
|
|
@@ -350,12 +469,18 @@ async def fetch_image_layers_async(
|
|
|
350
469
|
ecr_client: ECRClient,
|
|
351
470
|
repo_images_list: list[dict],
|
|
352
471
|
max_concurrent: int = 200,
|
|
353
|
-
) -> tuple[dict[str, dict[str, list[str]]], dict[str, str]]:
|
|
472
|
+
) -> tuple[dict[str, dict[str, list[str]]], dict[str, str], dict[str, dict[str, str]]]:
|
|
354
473
|
"""
|
|
355
474
|
Fetch image layers for ECR images in parallel with caching and non-blocking I/O.
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
- image_layers_data: Map of image URI to platform to diff_ids
|
|
478
|
+
- image_digest_map: Map of image URI to image digest
|
|
479
|
+
- image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
|
|
356
480
|
"""
|
|
357
481
|
image_layers_data: dict[str, dict[str, list[str]]] = {}
|
|
358
482
|
image_digest_map: dict[str, str] = {}
|
|
483
|
+
image_attestation_map: dict[str, dict[str, str]] = {}
|
|
359
484
|
semaphore = asyncio.Semaphore(max_concurrent)
|
|
360
485
|
|
|
361
486
|
# Cache for manifest fetches keyed by (repo_name, imageDigest)
|
|
@@ -402,8 +527,8 @@ async def fetch_image_layers_async(
|
|
|
402
527
|
async def fetch_single_image_layers(
|
|
403
528
|
repo_image: dict,
|
|
404
529
|
http_client: httpx.AsyncClient,
|
|
405
|
-
) -> Optional[tuple[str, str, dict[str, list[str]]]]:
|
|
406
|
-
"""Fetch layers for a single image."""
|
|
530
|
+
) -> Optional[tuple[str, str, dict[str, list[str]], Optional[dict[str, str]]]]:
|
|
531
|
+
"""Fetch layers for a single image and extract attestation if present."""
|
|
407
532
|
async with semaphore:
|
|
408
533
|
# Caller guarantees these fields exist in every repo_image
|
|
409
534
|
uri = repo_image["uri"]
|
|
@@ -426,24 +551,37 @@ async def fetch_image_layers_async(
|
|
|
426
551
|
|
|
427
552
|
manifest_media_type = (media_type or doc.get("mediaType", "")).lower()
|
|
428
553
|
platform_layers: dict[str, list[str]] = {}
|
|
554
|
+
attestation_data: Optional[dict[str, str]] = None
|
|
429
555
|
|
|
430
556
|
if doc.get("manifests") and manifest_media_type in INDEX_MEDIA_TYPES_LOWER:
|
|
431
557
|
|
|
432
558
|
async def _process_child_manifest(
|
|
433
559
|
manifest_ref: dict,
|
|
434
|
-
) -> dict[str, list[str]]:
|
|
435
|
-
#
|
|
560
|
+
) -> tuple[dict[str, list[str]], Optional[dict[str, str]]]:
|
|
561
|
+
# Check if this is an attestation manifest
|
|
436
562
|
if (
|
|
437
563
|
manifest_ref.get("annotations", {}).get(
|
|
438
564
|
"vnd.docker.reference.type"
|
|
439
565
|
)
|
|
440
566
|
== "attestation-manifest"
|
|
441
567
|
):
|
|
442
|
-
|
|
568
|
+
# Extract base image from attestation
|
|
569
|
+
child_digest = manifest_ref.get("digest")
|
|
570
|
+
if child_digest:
|
|
571
|
+
attestation_info = (
|
|
572
|
+
await _extract_parent_image_from_attestation(
|
|
573
|
+
ecr_client,
|
|
574
|
+
repo_name,
|
|
575
|
+
child_digest,
|
|
576
|
+
http_client,
|
|
577
|
+
)
|
|
578
|
+
)
|
|
579
|
+
return {}, attestation_info
|
|
580
|
+
return {}, None
|
|
443
581
|
|
|
444
582
|
child_digest = manifest_ref.get("digest")
|
|
445
583
|
if not child_digest:
|
|
446
|
-
return {}
|
|
584
|
+
return {}, None
|
|
447
585
|
|
|
448
586
|
# Use optimized caching for child manifest
|
|
449
587
|
child_doc, _ = await _fetch_and_cache_manifest(
|
|
@@ -452,16 +590,17 @@ async def fetch_image_layers_async(
|
|
|
452
590
|
[ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
|
|
453
591
|
)
|
|
454
592
|
if not child_doc:
|
|
455
|
-
return {}
|
|
593
|
+
return {}, None
|
|
456
594
|
|
|
457
595
|
platform_hint = extract_platform_from_manifest(manifest_ref)
|
|
458
|
-
|
|
596
|
+
diff_map = await _diff_ids_for_manifest(
|
|
459
597
|
ecr_client,
|
|
460
598
|
repo_name,
|
|
461
599
|
child_doc,
|
|
462
600
|
http_client,
|
|
463
601
|
platform_hint,
|
|
464
602
|
)
|
|
603
|
+
return diff_map, None
|
|
465
604
|
|
|
466
605
|
# Process all child manifests in parallel
|
|
467
606
|
child_tasks = [
|
|
@@ -474,8 +613,13 @@ async def fetch_image_layers_async(
|
|
|
474
613
|
|
|
475
614
|
# Merge results from successful child manifest processing
|
|
476
615
|
for result in child_results:
|
|
477
|
-
if isinstance(result,
|
|
478
|
-
|
|
616
|
+
if isinstance(result, tuple) and len(result) == 2:
|
|
617
|
+
layer_data, attest_data = result
|
|
618
|
+
if layer_data:
|
|
619
|
+
platform_layers.update(layer_data)
|
|
620
|
+
if attest_data and not attestation_data:
|
|
621
|
+
# Use first attestation found
|
|
622
|
+
attestation_data = attest_data
|
|
479
623
|
else:
|
|
480
624
|
diff_map = await _diff_ids_for_manifest(
|
|
481
625
|
ecr_client,
|
|
@@ -487,7 +631,7 @@ async def fetch_image_layers_async(
|
|
|
487
631
|
platform_layers.update(diff_map)
|
|
488
632
|
|
|
489
633
|
if platform_layers:
|
|
490
|
-
return uri, digest, platform_layers
|
|
634
|
+
return uri, digest, platform_layers, attestation_data
|
|
491
635
|
|
|
492
636
|
return None
|
|
493
637
|
|
|
@@ -507,7 +651,7 @@ async def fetch_image_layers_async(
|
|
|
507
651
|
)
|
|
508
652
|
|
|
509
653
|
if not tasks:
|
|
510
|
-
return image_layers_data, image_digest_map
|
|
654
|
+
return image_layers_data, image_digest_map, image_attestation_map
|
|
511
655
|
|
|
512
656
|
progress_interval = max(1, min(100, total // 10 or 1))
|
|
513
657
|
completed = 0
|
|
@@ -526,16 +670,22 @@ async def fetch_image_layers_async(
|
|
|
526
670
|
)
|
|
527
671
|
|
|
528
672
|
if result:
|
|
529
|
-
uri, digest, layer_data = result
|
|
673
|
+
uri, digest, layer_data, attestation_data = result
|
|
530
674
|
if not digest:
|
|
531
675
|
raise ValueError(f"Empty digest returned for image {uri}")
|
|
532
676
|
image_layers_data[uri] = layer_data
|
|
533
677
|
image_digest_map[uri] = digest
|
|
678
|
+
if attestation_data:
|
|
679
|
+
image_attestation_map[uri] = attestation_data
|
|
534
680
|
|
|
535
681
|
logger.info(
|
|
536
682
|
f"Successfully fetched layers for {len(image_layers_data)}/{len(repo_images_list)} images"
|
|
537
683
|
)
|
|
538
|
-
|
|
684
|
+
if image_attestation_map:
|
|
685
|
+
logger.info(
|
|
686
|
+
f"Found attestations with base image info for {len(image_attestation_map)} images"
|
|
687
|
+
)
|
|
688
|
+
return image_layers_data, image_digest_map, image_attestation_map
|
|
539
689
|
|
|
540
690
|
|
|
541
691
|
def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
|
|
@@ -613,9 +763,11 @@ def sync(
|
|
|
613
763
|
f"Starting to fetch layers for {len(repo_images_list)} images..."
|
|
614
764
|
)
|
|
615
765
|
|
|
616
|
-
async def _fetch_with_async_client() ->
|
|
617
|
-
|
|
618
|
-
|
|
766
|
+
async def _fetch_with_async_client() -> tuple[
|
|
767
|
+
dict[str, dict[str, list[str]]],
|
|
768
|
+
dict[str, str],
|
|
769
|
+
dict[str, dict[str, str]],
|
|
770
|
+
]:
|
|
619
771
|
# Use credentials from the existing boto3 session
|
|
620
772
|
credentials = boto3_session.get_credentials()
|
|
621
773
|
session = aioboto3.Session(
|
|
@@ -635,8 +787,8 @@ def sync(
|
|
|
635
787
|
loop = asyncio.new_event_loop()
|
|
636
788
|
asyncio.set_event_loop(loop)
|
|
637
789
|
|
|
638
|
-
image_layers_data, image_digest_map =
|
|
639
|
-
_fetch_with_async_client()
|
|
790
|
+
image_layers_data, image_digest_map, image_attestation_map = (
|
|
791
|
+
loop.run_until_complete(_fetch_with_async_client())
|
|
640
792
|
)
|
|
641
793
|
|
|
642
794
|
logger.info(
|
|
@@ -645,6 +797,7 @@ def sync(
|
|
|
645
797
|
layers, memberships = transform_ecr_image_layers(
|
|
646
798
|
image_layers_data,
|
|
647
799
|
image_digest_map,
|
|
800
|
+
image_attestation_map,
|
|
648
801
|
)
|
|
649
802
|
load_ecr_image_layers(
|
|
650
803
|
neo4j_session,
|
|
@@ -9,7 +9,9 @@ from cartography.util import timeit
|
|
|
9
9
|
|
|
10
10
|
from . import app_service
|
|
11
11
|
from . import compute
|
|
12
|
+
from . import container_instances
|
|
12
13
|
from . import cosmosdb
|
|
14
|
+
from . import data_lake
|
|
13
15
|
from . import functions
|
|
14
16
|
from . import logic_apps
|
|
15
17
|
from . import resource_groups
|
|
@@ -30,6 +32,13 @@ def _sync_one_subscription(
|
|
|
30
32
|
update_tag: int,
|
|
31
33
|
common_job_parameters: Dict,
|
|
32
34
|
) -> None:
|
|
35
|
+
container_instances.sync(
|
|
36
|
+
neo4j_session,
|
|
37
|
+
credentials,
|
|
38
|
+
subscription_id,
|
|
39
|
+
update_tag,
|
|
40
|
+
common_job_parameters,
|
|
41
|
+
)
|
|
33
42
|
compute.sync(
|
|
34
43
|
neo4j_session,
|
|
35
44
|
credentials.credential,
|
|
@@ -86,6 +95,13 @@ def _sync_one_subscription(
|
|
|
86
95
|
update_tag,
|
|
87
96
|
common_job_parameters,
|
|
88
97
|
)
|
|
98
|
+
data_lake.sync(
|
|
99
|
+
neo4j_session,
|
|
100
|
+
credentials,
|
|
101
|
+
subscription_id,
|
|
102
|
+
update_tag,
|
|
103
|
+
common_job_parameters,
|
|
104
|
+
)
|
|
89
105
|
|
|
90
106
|
|
|
91
107
|
def _sync_tenant(
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import neo4j
|
|
5
|
+
from azure.core.exceptions import ClientAuthenticationError
|
|
6
|
+
from azure.core.exceptions import HttpResponseError
|
|
7
|
+
from azure.mgmt.containerinstance import ContainerInstanceManagementClient
|
|
8
|
+
|
|
9
|
+
from cartography.client.core.tx import load
|
|
10
|
+
from cartography.graph.job import GraphJob
|
|
11
|
+
from cartography.models.azure.container_instance import AzureContainerInstanceSchema
|
|
12
|
+
from cartography.util import timeit
|
|
13
|
+
|
|
14
|
+
from .util.credentials import Credentials
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@timeit
|
|
20
|
+
def get_container_instances(
|
|
21
|
+
credentials: Credentials, subscription_id: str
|
|
22
|
+
) -> list[dict]:
|
|
23
|
+
try:
|
|
24
|
+
client = ContainerInstanceManagementClient(
|
|
25
|
+
credentials.credential, subscription_id
|
|
26
|
+
)
|
|
27
|
+
# NOTE: Azure Container Instances are called "Container Groups" in the SDK
|
|
28
|
+
return [cg.as_dict() for cg in client.container_groups.list()]
|
|
29
|
+
except (ClientAuthenticationError, HttpResponseError) as e:
|
|
30
|
+
logger.warning(
|
|
31
|
+
f"Failed to get Container Instances for subscription {subscription_id}: {str(e)}"
|
|
32
|
+
)
|
|
33
|
+
return []
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def transform_container_instances(container_groups: list[dict]) -> list[dict]:
|
|
37
|
+
transformed_instances: list[dict[str, Any]] = []
|
|
38
|
+
for group in container_groups:
|
|
39
|
+
transformed_instance = {
|
|
40
|
+
"id": group.get("id"),
|
|
41
|
+
"name": group.get("name"),
|
|
42
|
+
"location": group.get("location"),
|
|
43
|
+
"type": group.get("type"),
|
|
44
|
+
"provisioning_state": group.get("properties", {}).get("provisioning_state"),
|
|
45
|
+
"ip_address": ((group.get("properties") or {}).get("ip_address") or {}).get(
|
|
46
|
+
"ip"
|
|
47
|
+
),
|
|
48
|
+
"os_type": group.get("properties", {}).get("os_type"),
|
|
49
|
+
}
|
|
50
|
+
transformed_instances.append(transformed_instance)
|
|
51
|
+
return transformed_instances
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@timeit
|
|
55
|
+
def load_container_instances(
|
|
56
|
+
neo4j_session: neo4j.Session,
|
|
57
|
+
data: list[dict[str, Any]],
|
|
58
|
+
subscription_id: str,
|
|
59
|
+
update_tag: int,
|
|
60
|
+
) -> None:
|
|
61
|
+
load(
|
|
62
|
+
neo4j_session,
|
|
63
|
+
AzureContainerInstanceSchema(),
|
|
64
|
+
data,
|
|
65
|
+
lastupdated=update_tag,
|
|
66
|
+
AZURE_SUBSCRIPTION_ID=subscription_id,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@timeit
|
|
71
|
+
def cleanup_container_instances(
|
|
72
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict
|
|
73
|
+
) -> None:
|
|
74
|
+
GraphJob.from_node_schema(
|
|
75
|
+
AzureContainerInstanceSchema(), common_job_parameters
|
|
76
|
+
).run(neo4j_session)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@timeit
|
|
80
|
+
def sync(
|
|
81
|
+
neo4j_session: neo4j.Session,
|
|
82
|
+
credentials: Credentials,
|
|
83
|
+
subscription_id: str,
|
|
84
|
+
update_tag: int,
|
|
85
|
+
common_job_parameters: dict,
|
|
86
|
+
) -> None:
|
|
87
|
+
logger.info(
|
|
88
|
+
f"Syncing Azure Container Instances for subscription {subscription_id}."
|
|
89
|
+
)
|
|
90
|
+
raw_groups = get_container_instances(credentials, subscription_id)
|
|
91
|
+
transformed_groups = transform_container_instances(raw_groups)
|
|
92
|
+
load_container_instances(
|
|
93
|
+
neo4j_session, transformed_groups, subscription_id, update_tag
|
|
94
|
+
)
|
|
95
|
+
cleanup_container_instances(neo4j_session, common_job_parameters)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import neo4j
|
|
5
|
+
from azure.core.exceptions import ClientAuthenticationError
|
|
6
|
+
from azure.core.exceptions import HttpResponseError
|
|
7
|
+
from azure.mgmt.storage import StorageManagementClient
|
|
8
|
+
|
|
9
|
+
from cartography.client.core.tx import load
|
|
10
|
+
from cartography.graph.job import GraphJob
|
|
11
|
+
from cartography.models.azure.data_lake_filesystem import AzureDataLakeFileSystemSchema
|
|
12
|
+
from cartography.util import timeit
|
|
13
|
+
|
|
14
|
+
from .util.credentials import Credentials
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _get_resource_group_from_id(resource_id: str) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Helper function to parse the resource group name from a full resource ID string.
|
|
22
|
+
"""
|
|
23
|
+
parts = resource_id.lower().split("/")
|
|
24
|
+
rg_index = parts.index("resourcegroups")
|
|
25
|
+
return parts[rg_index + 1]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@timeit
|
|
29
|
+
def get_datalake_accounts(credentials: Credentials, subscription_id: str) -> list[dict]:
|
|
30
|
+
try:
|
|
31
|
+
client = StorageManagementClient(credentials.credential, subscription_id)
|
|
32
|
+
storage_accounts = [sa.as_dict() for sa in client.storage_accounts.list()]
|
|
33
|
+
return [sa for sa in storage_accounts if sa.get("is_hns_enabled")]
|
|
34
|
+
except (ClientAuthenticationError, HttpResponseError) as e:
|
|
35
|
+
logger.warning(f"Failed to get Storage Accounts for Data Lake sync: {str(e)}")
|
|
36
|
+
return []
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@timeit
|
|
40
|
+
def get_filesystems_for_account(
|
|
41
|
+
client: StorageManagementClient,
|
|
42
|
+
account: dict,
|
|
43
|
+
) -> list[dict]:
|
|
44
|
+
resource_group_name = _get_resource_group_from_id(account["id"])
|
|
45
|
+
try:
|
|
46
|
+
return [
|
|
47
|
+
c.as_dict()
|
|
48
|
+
for c in client.blob_containers.list(
|
|
49
|
+
resource_group_name,
|
|
50
|
+
account["name"],
|
|
51
|
+
)
|
|
52
|
+
]
|
|
53
|
+
except (ClientAuthenticationError, HttpResponseError) as e:
|
|
54
|
+
logger.warning(
|
|
55
|
+
f"Failed to get containers for storage account {account['name']}: {str(e)}",
|
|
56
|
+
)
|
|
57
|
+
return []
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@timeit
|
|
61
|
+
def transform_datalake_filesystems(filesystems_response: list[dict]) -> list[dict]:
|
|
62
|
+
transformed_filesystems: list[dict[str, Any]] = []
|
|
63
|
+
for fs in filesystems_response:
|
|
64
|
+
transformed_filesystem = {
|
|
65
|
+
"id": fs.get("id"),
|
|
66
|
+
"name": fs.get("name"),
|
|
67
|
+
"public_access": fs.get("properties", {}).get("public_access"),
|
|
68
|
+
"last_modified_time": fs.get("properties", {}).get("last_modified_time"),
|
|
69
|
+
"has_immutability_policy": fs.get("properties", {}).get(
|
|
70
|
+
"has_immutability_policy",
|
|
71
|
+
),
|
|
72
|
+
"has_legal_hold": fs.get("properties", {}).get("has_legal_hold"),
|
|
73
|
+
}
|
|
74
|
+
transformed_filesystems.append(transformed_filesystem)
|
|
75
|
+
return transformed_filesystems
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@timeit
|
|
79
|
+
def load_datalake_filesystems(
|
|
80
|
+
neo4j_session: neo4j.Session,
|
|
81
|
+
data: list[dict[str, Any]],
|
|
82
|
+
storage_account_id: str,
|
|
83
|
+
update_tag: int,
|
|
84
|
+
) -> None:
|
|
85
|
+
load(
|
|
86
|
+
neo4j_session,
|
|
87
|
+
AzureDataLakeFileSystemSchema(),
|
|
88
|
+
data,
|
|
89
|
+
lastupdated=update_tag,
|
|
90
|
+
STORAGE_ACCOUNT_ID=storage_account_id,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@timeit
|
|
95
|
+
def sync(
|
|
96
|
+
neo4j_session: neo4j.Session,
|
|
97
|
+
credentials: Credentials,
|
|
98
|
+
subscription_id: str,
|
|
99
|
+
update_tag: int,
|
|
100
|
+
common_job_parameters: dict,
|
|
101
|
+
) -> None:
|
|
102
|
+
logger.info(
|
|
103
|
+
f"Syncing Azure Data Lake File Systems for subscription {subscription_id}.",
|
|
104
|
+
)
|
|
105
|
+
client = StorageManagementClient(credentials.credential, subscription_id)
|
|
106
|
+
|
|
107
|
+
datalake_accounts = get_datalake_accounts(credentials, subscription_id)
|
|
108
|
+
for account in datalake_accounts:
|
|
109
|
+
account_id = account["id"]
|
|
110
|
+
raw_filesystems = get_filesystems_for_account(client, account)
|
|
111
|
+
transformed_filesystems = transform_datalake_filesystems(raw_filesystems)
|
|
112
|
+
|
|
113
|
+
load_datalake_filesystems(
|
|
114
|
+
neo4j_session,
|
|
115
|
+
transformed_filesystems,
|
|
116
|
+
account_id,
|
|
117
|
+
update_tag,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
cleanup_params = common_job_parameters.copy()
|
|
121
|
+
cleanup_params["STORAGE_ACCOUNT_ID"] = account_id
|
|
122
|
+
GraphJob.from_node_schema(AzureDataLakeFileSystemSchema(), cleanup_params).run(
|
|
123
|
+
neo4j_session,
|
|
124
|
+
)
|
|
@@ -84,7 +84,7 @@ def _get_teams_repos_inner_func(
|
|
|
84
84
|
repo_urls: list[str],
|
|
85
85
|
repo_permissions: list[str],
|
|
86
86
|
) -> None:
|
|
87
|
-
logger.info(f"
|
|
87
|
+
logger.info(f"Retrieving team repos for {team_name}.")
|
|
88
88
|
team_repos = _get_team_repos(org, api_url, token, team_name)
|
|
89
89
|
|
|
90
90
|
# The `or []` is because `.nodes` can be None. See:
|
|
@@ -192,7 +192,7 @@ def _get_teams_users_inner_func(
|
|
|
192
192
|
user_urls: List[str],
|
|
193
193
|
user_roles: List[str],
|
|
194
194
|
) -> None:
|
|
195
|
-
logger.info(f"
|
|
195
|
+
logger.info(f"Retrieving team users for {team_name}.")
|
|
196
196
|
team_users = _get_team_users(org, api_url, token, team_name)
|
|
197
197
|
# The `or []` is because `.nodes` can be None. See:
|
|
198
198
|
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
|
|
@@ -299,7 +299,7 @@ def _get_child_teams_inner_func(
|
|
|
299
299
|
team_name: str,
|
|
300
300
|
team_urls: List[str],
|
|
301
301
|
) -> None:
|
|
302
|
-
logger.info(f"
|
|
302
|
+
logger.info(f"Retrieving child teams for {team_name}.")
|
|
303
303
|
child_teams = _get_child_teams(org, api_url, token, team_name)
|
|
304
304
|
# The `or []` is because `.nodes` can be None. See:
|
|
305
305
|
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
|