cartography 0.108.0rc1__py3-none-any.whl → 0.109.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +14 -0
- cartography/config.py +4 -0
- cartography/data/indexes.cypher +0 -17
- cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
- cartography/intel/aws/cloudtrail.py +17 -4
- cartography/intel/aws/cloudtrail_management_events.py +614 -16
- cartography/intel/aws/cloudwatch.py +73 -4
- cartography/intel/aws/ec2/subnets.py +37 -63
- cartography/intel/aws/ecr.py +55 -80
- cartography/intel/aws/elasticache.py +102 -79
- cartography/intel/aws/eventbridge.py +91 -0
- cartography/intel/aws/glue.py +117 -0
- cartography/intel/aws/identitycenter.py +71 -23
- cartography/intel/aws/kms.py +160 -200
- cartography/intel/aws/lambda_function.py +206 -190
- cartography/intel/aws/rds.py +243 -458
- cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/aws/route53.py +334 -332
- cartography/intel/aws/secretsmanager.py +62 -44
- cartography/intel/entra/groups.py +29 -1
- cartography/intel/gcp/__init__.py +10 -0
- cartography/intel/gcp/compute.py +19 -42
- cartography/intel/trivy/__init__.py +73 -13
- cartography/intel/trivy/scanner.py +115 -92
- cartography/models/aws/cloudtrail/management_events.py +95 -6
- cartography/models/aws/cloudtrail/trail.py +21 -0
- cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
- cartography/models/aws/ec2/subnets.py +65 -0
- cartography/models/aws/ecr/__init__.py +0 -0
- cartography/models/aws/ecr/image.py +41 -0
- cartography/models/aws/ecr/repository.py +72 -0
- cartography/models/aws/ecr/repository_image.py +95 -0
- cartography/models/aws/elasticache/__init__.py +0 -0
- cartography/models/aws/elasticache/cluster.py +65 -0
- cartography/models/aws/elasticache/topic.py +67 -0
- cartography/models/aws/eventbridge/__init__.py +0 -0
- cartography/models/aws/eventbridge/rule.py +77 -0
- cartography/models/aws/glue/__init__.py +0 -0
- cartography/models/aws/glue/connection.py +51 -0
- cartography/models/aws/identitycenter/awspermissionset.py +44 -0
- cartography/models/aws/kms/__init__.py +0 -0
- cartography/models/aws/kms/aliases.py +86 -0
- cartography/models/aws/kms/grants.py +65 -0
- cartography/models/aws/kms/keys.py +88 -0
- cartography/models/aws/lambda_function/__init__.py +0 -0
- cartography/models/aws/lambda_function/alias.py +74 -0
- cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
- cartography/models/aws/lambda_function/lambda_function.py +89 -0
- cartography/models/aws/lambda_function/layer.py +72 -0
- cartography/models/aws/rds/__init__.py +0 -0
- cartography/models/aws/rds/cluster.py +89 -0
- cartography/models/aws/rds/instance.py +154 -0
- cartography/models/aws/rds/snapshot.py +108 -0
- cartography/models/aws/rds/subnet_group.py +101 -0
- cartography/models/aws/route53/__init__.py +0 -0
- cartography/models/aws/route53/dnsrecord.py +214 -0
- cartography/models/aws/route53/nameserver.py +63 -0
- cartography/models/aws/route53/subzone.py +40 -0
- cartography/models/aws/route53/zone.py +47 -0
- cartography/models/aws/secretsmanager/secret.py +106 -0
- cartography/models/entra/group.py +26 -0
- cartography/models/entra/user.py +6 -0
- cartography/models/gcp/compute/__init__.py +0 -0
- cartography/models/gcp/compute/vpc.py +50 -0
- cartography/util.py +8 -1
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/METADATA +2 -2
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/RECORD +73 -44
- cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
- cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
- cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
- cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
- cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
- cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/WHEEL +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ import neo4j
|
|
|
7
7
|
|
|
8
8
|
from cartography.client.core.tx import load
|
|
9
9
|
from cartography.graph.job import GraphJob
|
|
10
|
+
from cartography.models.aws.secretsmanager.secret import SecretsManagerSecretSchema
|
|
10
11
|
from cartography.models.aws.secretsmanager.secret_version import (
|
|
11
12
|
SecretsManagerSecretVersionSchema,
|
|
12
13
|
)
|
|
@@ -14,7 +15,6 @@ from cartography.stats import get_stats_client
|
|
|
14
15
|
from cartography.util import aws_handle_regions
|
|
15
16
|
from cartography.util import dict_date_to_epoch
|
|
16
17
|
from cartography.util import merge_module_sync_metadata
|
|
17
|
-
from cartography.util import run_cleanup_job
|
|
18
18
|
from cartography.util import timeit
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
@@ -32,6 +32,37 @@ def get_secret_list(boto3_session: boto3.session.Session, region: str) -> List[D
|
|
|
32
32
|
return secrets
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def transform_secrets(
|
|
36
|
+
secrets: List[Dict],
|
|
37
|
+
) -> List[Dict]:
|
|
38
|
+
"""
|
|
39
|
+
Transform AWS Secrets Manager Secrets to match the data model.
|
|
40
|
+
"""
|
|
41
|
+
transformed_data = []
|
|
42
|
+
for secret in secrets:
|
|
43
|
+
# Start with a copy of the original secret data
|
|
44
|
+
transformed = dict(secret)
|
|
45
|
+
|
|
46
|
+
# Convert date fields to epoch timestamps
|
|
47
|
+
transformed["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
|
|
48
|
+
transformed["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
|
|
49
|
+
transformed["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
|
|
50
|
+
transformed["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
|
|
51
|
+
transformed["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
|
|
52
|
+
|
|
53
|
+
# Flatten nested RotationRules.AutomaticallyAfterDays property
|
|
54
|
+
if "RotationRules" in secret and secret["RotationRules"]:
|
|
55
|
+
rotation_rules = secret["RotationRules"]
|
|
56
|
+
if "AutomaticallyAfterDays" in rotation_rules:
|
|
57
|
+
transformed["RotationRulesAutomaticallyAfterDays"] = rotation_rules[
|
|
58
|
+
"AutomaticallyAfterDays"
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
transformed_data.append(transformed)
|
|
62
|
+
|
|
63
|
+
return transformed_data
|
|
64
|
+
|
|
65
|
+
|
|
35
66
|
@timeit
|
|
36
67
|
def load_secrets(
|
|
37
68
|
neo4j_session: neo4j.Session,
|
|
@@ -40,48 +71,33 @@ def load_secrets(
|
|
|
40
71
|
current_aws_account_id: str,
|
|
41
72
|
aws_update_tag: int,
|
|
42
73
|
) -> None:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
s.lastupdated = $aws_update_tag
|
|
56
|
-
WITH s
|
|
57
|
-
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
|
|
58
|
-
MERGE (owner)-[r:RESOURCE]->(s)
|
|
59
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
60
|
-
SET r.lastupdated = $aws_update_tag
|
|
61
|
-
"""
|
|
62
|
-
for secret in data:
|
|
63
|
-
secret["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
|
|
64
|
-
secret["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
|
|
65
|
-
secret["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
|
|
66
|
-
secret["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
|
|
67
|
-
secret["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
|
|
68
|
-
|
|
69
|
-
neo4j_session.run(
|
|
70
|
-
ingest_secrets,
|
|
71
|
-
Secrets=data,
|
|
74
|
+
"""
|
|
75
|
+
Load transformed secrets into Neo4j using the data model.
|
|
76
|
+
Expects data to already be transformed by transform_secrets().
|
|
77
|
+
"""
|
|
78
|
+
logger.info(f"Loading {len(data)} Secrets for region {region} into graph.")
|
|
79
|
+
|
|
80
|
+
# Load using the schema-based approach
|
|
81
|
+
load(
|
|
82
|
+
neo4j_session,
|
|
83
|
+
SecretsManagerSecretSchema(),
|
|
84
|
+
data,
|
|
85
|
+
lastupdated=aws_update_tag,
|
|
72
86
|
Region=region,
|
|
73
|
-
|
|
74
|
-
aws_update_tag=aws_update_tag,
|
|
87
|
+
AWS_ID=current_aws_account_id,
|
|
75
88
|
)
|
|
76
89
|
|
|
77
90
|
|
|
78
91
|
@timeit
|
|
79
92
|
def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
93
|
+
"""
|
|
94
|
+
Run Secrets cleanup job using the data model.
|
|
95
|
+
"""
|
|
96
|
+
logger.debug("Running Secrets cleanup job.")
|
|
97
|
+
cleanup_job = GraphJob.from_node_schema(
|
|
98
|
+
SecretsManagerSecretSchema(), common_job_parameters
|
|
84
99
|
)
|
|
100
|
+
cleanup_job.run(neo4j_session)
|
|
85
101
|
|
|
86
102
|
|
|
87
103
|
@timeit
|
|
@@ -121,8 +137,6 @@ def get_secret_versions(
|
|
|
121
137
|
|
|
122
138
|
def transform_secret_versions(
|
|
123
139
|
versions: List[Dict],
|
|
124
|
-
region: str,
|
|
125
|
-
aws_account_id: str,
|
|
126
140
|
) -> List[Dict]:
|
|
127
141
|
"""
|
|
128
142
|
Transform AWS Secrets Manager Secret Versions to match the data model.
|
|
@@ -203,7 +217,15 @@ def sync(
|
|
|
203
217
|
)
|
|
204
218
|
secrets = get_secret_list(boto3_session, region)
|
|
205
219
|
|
|
206
|
-
|
|
220
|
+
transformed_secrets = transform_secrets(secrets)
|
|
221
|
+
|
|
222
|
+
load_secrets(
|
|
223
|
+
neo4j_session,
|
|
224
|
+
transformed_secrets,
|
|
225
|
+
region,
|
|
226
|
+
current_aws_account_id,
|
|
227
|
+
update_tag,
|
|
228
|
+
)
|
|
207
229
|
|
|
208
230
|
all_versions = []
|
|
209
231
|
for secret in secrets:
|
|
@@ -216,11 +238,7 @@ def sync(
|
|
|
216
238
|
)
|
|
217
239
|
all_versions.extend(versions)
|
|
218
240
|
|
|
219
|
-
transformed_data = transform_secret_versions(
|
|
220
|
-
all_versions,
|
|
221
|
-
region,
|
|
222
|
-
current_aws_account_id,
|
|
223
|
-
)
|
|
241
|
+
transformed_data = transform_secret_versions(all_versions)
|
|
224
242
|
|
|
225
243
|
load_secret_versions(
|
|
226
244
|
neo4j_session,
|
|
@@ -59,10 +59,29 @@ async def get_group_members(
|
|
|
59
59
|
return user_ids, group_ids
|
|
60
60
|
|
|
61
61
|
|
|
62
|
+
@timeit
|
|
63
|
+
async def get_group_owners(client: GraphServiceClient, group_id: str) -> list[str]:
|
|
64
|
+
"""Get owner user IDs for a given group."""
|
|
65
|
+
owner_ids: list[str] = []
|
|
66
|
+
request_builder = client.groups.by_group_id(group_id).owners
|
|
67
|
+
page = await request_builder.get()
|
|
68
|
+
while page:
|
|
69
|
+
if page.value:
|
|
70
|
+
for obj in page.value:
|
|
71
|
+
odata_type = getattr(obj, "odata_type", "")
|
|
72
|
+
if odata_type == "#microsoft.graph.user":
|
|
73
|
+
owner_ids.append(obj.id)
|
|
74
|
+
if not page.odata_next_link:
|
|
75
|
+
break
|
|
76
|
+
page = await request_builder.with_url(page.odata_next_link).get()
|
|
77
|
+
return owner_ids
|
|
78
|
+
|
|
79
|
+
|
|
62
80
|
def transform_groups(
|
|
63
81
|
groups: list[Group],
|
|
64
82
|
user_member_map: dict[str, list[str]],
|
|
65
83
|
group_member_map: dict[str, list[str]],
|
|
84
|
+
group_owner_map: dict[str, list[str]],
|
|
66
85
|
) -> list[dict[str, Any]]:
|
|
67
86
|
"""Transform API responses into dictionaries for ingestion."""
|
|
68
87
|
result: list[dict[str, Any]] = []
|
|
@@ -82,6 +101,7 @@ def transform_groups(
|
|
|
82
101
|
"deleted_date_time": g.deleted_date_time,
|
|
83
102
|
"member_ids": user_member_map.get(g.id, []),
|
|
84
103
|
"member_group_ids": group_member_map.get(g.id, []),
|
|
104
|
+
"owner_ids": group_owner_map.get(g.id, []),
|
|
85
105
|
}
|
|
86
106
|
result.append(transformed)
|
|
87
107
|
return result
|
|
@@ -134,6 +154,12 @@ async def sync_entra_groups(
|
|
|
134
154
|
|
|
135
155
|
user_member_map: dict[str, list[str]] = {}
|
|
136
156
|
group_member_map: dict[str, list[str]] = {}
|
|
157
|
+
group_owner_map: dict[str, list[str]] = {}
|
|
158
|
+
|
|
159
|
+
for group in groups:
|
|
160
|
+
owners = await get_group_owners(client, group.id)
|
|
161
|
+
group_owner_map[group.id] = owners
|
|
162
|
+
|
|
137
163
|
for group in groups:
|
|
138
164
|
try:
|
|
139
165
|
users, subgroups = await get_group_members(client, group.id)
|
|
@@ -144,7 +170,9 @@ async def sync_entra_groups(
|
|
|
144
170
|
user_member_map[group.id] = []
|
|
145
171
|
group_member_map[group.id] = []
|
|
146
172
|
|
|
147
|
-
transformed_groups = transform_groups(
|
|
173
|
+
transformed_groups = transform_groups(
|
|
174
|
+
groups, user_member_map, group_member_map, group_owner_map
|
|
175
|
+
)
|
|
148
176
|
|
|
149
177
|
load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
|
|
150
178
|
load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
|
|
@@ -391,6 +391,7 @@ def _sync_multiple_projects(
|
|
|
391
391
|
# Compute data sync
|
|
392
392
|
for project in projects:
|
|
393
393
|
project_id = project["projectId"]
|
|
394
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
394
395
|
logger.info("Syncing GCP project %s for Compute.", project_id)
|
|
395
396
|
_sync_single_project_compute(
|
|
396
397
|
neo4j_session,
|
|
@@ -399,10 +400,12 @@ def _sync_multiple_projects(
|
|
|
399
400
|
gcp_update_tag,
|
|
400
401
|
common_job_parameters,
|
|
401
402
|
)
|
|
403
|
+
del common_job_parameters["PROJECT_ID"]
|
|
402
404
|
|
|
403
405
|
# Storage data sync
|
|
404
406
|
for project in projects:
|
|
405
407
|
project_id = project["projectId"]
|
|
408
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
406
409
|
logger.info("Syncing GCP project %s for Storage", project_id)
|
|
407
410
|
_sync_single_project_storage(
|
|
408
411
|
neo4j_session,
|
|
@@ -411,10 +414,12 @@ def _sync_multiple_projects(
|
|
|
411
414
|
gcp_update_tag,
|
|
412
415
|
common_job_parameters,
|
|
413
416
|
)
|
|
417
|
+
del common_job_parameters["PROJECT_ID"]
|
|
414
418
|
|
|
415
419
|
# GKE data sync
|
|
416
420
|
for project in projects:
|
|
417
421
|
project_id = project["projectId"]
|
|
422
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
418
423
|
logger.info("Syncing GCP project %s for GKE", project_id)
|
|
419
424
|
_sync_single_project_gke(
|
|
420
425
|
neo4j_session,
|
|
@@ -423,10 +428,12 @@ def _sync_multiple_projects(
|
|
|
423
428
|
gcp_update_tag,
|
|
424
429
|
common_job_parameters,
|
|
425
430
|
)
|
|
431
|
+
del common_job_parameters["PROJECT_ID"]
|
|
426
432
|
|
|
427
433
|
# DNS data sync
|
|
428
434
|
for project in projects:
|
|
429
435
|
project_id = project["projectId"]
|
|
436
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
430
437
|
logger.info("Syncing GCP project %s for DNS", project_id)
|
|
431
438
|
_sync_single_project_dns(
|
|
432
439
|
neo4j_session,
|
|
@@ -435,14 +442,17 @@ def _sync_multiple_projects(
|
|
|
435
442
|
gcp_update_tag,
|
|
436
443
|
common_job_parameters,
|
|
437
444
|
)
|
|
445
|
+
del common_job_parameters["PROJECT_ID"]
|
|
438
446
|
|
|
439
447
|
# IAM data sync
|
|
440
448
|
for project in projects:
|
|
441
449
|
project_id = project["projectId"]
|
|
450
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
442
451
|
logger.info("Syncing GCP project %s for IAM", project_id)
|
|
443
452
|
_sync_single_project_iam(
|
|
444
453
|
neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters
|
|
445
454
|
)
|
|
455
|
+
del common_job_parameters["PROJECT_ID"]
|
|
446
456
|
|
|
447
457
|
|
|
448
458
|
@timeit
|
cartography/intel/gcp/compute.py
CHANGED
|
@@ -14,6 +14,9 @@ import neo4j
|
|
|
14
14
|
from googleapiclient.discovery import HttpError
|
|
15
15
|
from googleapiclient.discovery import Resource
|
|
16
16
|
|
|
17
|
+
from cartography.client.core.tx import load
|
|
18
|
+
from cartography.graph.job import GraphJob
|
|
19
|
+
from cartography.models.gcp.compute.vpc import GCPVpcSchema
|
|
17
20
|
from cartography.util import run_cleanup_job
|
|
18
21
|
from cartography.util import timeit
|
|
19
22
|
|
|
@@ -600,48 +603,17 @@ def load_gcp_instances(
|
|
|
600
603
|
@timeit
|
|
601
604
|
def load_gcp_vpcs(
|
|
602
605
|
neo4j_session: neo4j.Session,
|
|
603
|
-
vpcs:
|
|
606
|
+
vpcs: list[dict[str, Any]],
|
|
604
607
|
gcp_update_tag: int,
|
|
608
|
+
project_id: str,
|
|
605
609
|
) -> None:
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
query = """
|
|
614
|
-
MERGE(p:GCPProject{id:$ProjectId})
|
|
615
|
-
ON CREATE SET p.firstseen = timestamp()
|
|
616
|
-
SET p.lastupdated = $gcp_update_tag
|
|
617
|
-
|
|
618
|
-
MERGE(vpc:GCPVpc{id:$PartialUri})
|
|
619
|
-
ON CREATE SET vpc.firstseen = timestamp(),
|
|
620
|
-
vpc.partial_uri = $PartialUri
|
|
621
|
-
SET vpc.self_link = $SelfLink,
|
|
622
|
-
vpc.name = $VpcName,
|
|
623
|
-
vpc.project_id = $ProjectId,
|
|
624
|
-
vpc.auto_create_subnetworks = $AutoCreateSubnetworks,
|
|
625
|
-
vpc.routing_config_routing_mode = $RoutingMode,
|
|
626
|
-
vpc.description = $Description,
|
|
627
|
-
vpc.lastupdated = $gcp_update_tag
|
|
628
|
-
|
|
629
|
-
MERGE (p)-[r:RESOURCE]->(vpc)
|
|
630
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
631
|
-
SET r.lastupdated = $gcp_update_tag
|
|
632
|
-
"""
|
|
633
|
-
for vpc in vpcs:
|
|
634
|
-
neo4j_session.run(
|
|
635
|
-
query,
|
|
636
|
-
ProjectId=vpc["project_id"],
|
|
637
|
-
PartialUri=vpc["partial_uri"],
|
|
638
|
-
SelfLink=vpc["self_link"],
|
|
639
|
-
VpcName=vpc["name"],
|
|
640
|
-
AutoCreateSubnetworks=vpc["auto_create_subnetworks"],
|
|
641
|
-
RoutingMode=vpc["routing_config_routing_mode"],
|
|
642
|
-
Description=vpc["description"],
|
|
643
|
-
gcp_update_tag=gcp_update_tag,
|
|
644
|
-
)
|
|
610
|
+
load(
|
|
611
|
+
neo4j_session,
|
|
612
|
+
GCPVpcSchema(),
|
|
613
|
+
vpcs,
|
|
614
|
+
PROJECT_ID=project_id,
|
|
615
|
+
LASTUPDATED=gcp_update_tag,
|
|
616
|
+
)
|
|
645
617
|
|
|
646
618
|
|
|
647
619
|
@timeit
|
|
@@ -1159,6 +1131,12 @@ def cleanup_gcp_vpcs(neo4j_session: neo4j.Session, common_job_parameters: Dict)
|
|
|
1159
1131
|
:param common_job_parameters: dict of other job parameters to pass to Neo4j
|
|
1160
1132
|
:return: Nothing
|
|
1161
1133
|
"""
|
|
1134
|
+
GraphJob.from_node_schema(
|
|
1135
|
+
GCPVpcSchema(),
|
|
1136
|
+
common_job_parameters,
|
|
1137
|
+
).run(neo4j_session)
|
|
1138
|
+
|
|
1139
|
+
# TODO: remove this once we refactor GCP instances and add the instance to vpc rel as an object
|
|
1162
1140
|
run_cleanup_job(
|
|
1163
1141
|
"gcp_compute_vpc_cleanup.json",
|
|
1164
1142
|
neo4j_session,
|
|
@@ -1267,8 +1245,7 @@ def sync_gcp_vpcs(
|
|
|
1267
1245
|
"""
|
|
1268
1246
|
vpc_res = get_gcp_vpcs(project_id, compute)
|
|
1269
1247
|
vpcs = transform_gcp_vpcs(vpc_res)
|
|
1270
|
-
load_gcp_vpcs(neo4j_session, vpcs, gcp_update_tag)
|
|
1271
|
-
# TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
|
|
1248
|
+
load_gcp_vpcs(neo4j_session, vpcs, gcp_update_tag, project_id)
|
|
1272
1249
|
cleanup_gcp_vpcs(neo4j_session, common_job_parameters)
|
|
1273
1250
|
|
|
1274
1251
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
3
|
from typing import Any
|
|
3
4
|
|
|
@@ -8,7 +9,9 @@ from cartography.client.aws import list_accounts
|
|
|
8
9
|
from cartography.client.aws.ecr import get_ecr_images
|
|
9
10
|
from cartography.config import Config
|
|
10
11
|
from cartography.intel.trivy.scanner import cleanup
|
|
12
|
+
from cartography.intel.trivy.scanner import get_json_files_in_dir
|
|
11
13
|
from cartography.intel.trivy.scanner import get_json_files_in_s3
|
|
14
|
+
from cartography.intel.trivy.scanner import sync_single_image_from_file
|
|
12
15
|
from cartography.intel.trivy.scanner import sync_single_image_from_s3
|
|
13
16
|
from cartography.stats import get_stats_client
|
|
14
17
|
from cartography.util import timeit
|
|
@@ -39,13 +42,13 @@ def get_scan_targets(
|
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
def _get_intersection(
|
|
42
|
-
|
|
45
|
+
image_uris: set[str], json_files: set[str], trivy_s3_prefix: str
|
|
43
46
|
) -> list[tuple[str, str]]:
|
|
44
47
|
"""
|
|
45
48
|
Get the intersection of ECR images in the graph and S3 scan results.
|
|
46
49
|
|
|
47
50
|
Args:
|
|
48
|
-
|
|
51
|
+
image_uris: Set of ECR images in the graph
|
|
49
52
|
json_files: Set of S3 object keys for JSON files
|
|
50
53
|
trivy_s3_prefix: S3 prefix path containing scan results
|
|
51
54
|
|
|
@@ -60,7 +63,7 @@ def _get_intersection(
|
|
|
60
63
|
# Remove the prefix and the .json suffix
|
|
61
64
|
image_uri = s3_object_key[prefix_len:-5]
|
|
62
65
|
|
|
63
|
-
if image_uri in
|
|
66
|
+
if image_uri in image_uris:
|
|
64
67
|
intersection.append((image_uri, s3_object_key))
|
|
65
68
|
|
|
66
69
|
return intersection
|
|
@@ -90,12 +93,12 @@ def sync_trivy_aws_ecr_from_s3(
|
|
|
90
93
|
f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
|
|
91
94
|
)
|
|
92
95
|
|
|
93
|
-
|
|
96
|
+
image_uris: set[str] = get_scan_targets(neo4j_session)
|
|
94
97
|
json_files: set[str] = get_json_files_in_s3(
|
|
95
98
|
trivy_s3_bucket, trivy_s3_prefix, boto3_session
|
|
96
99
|
)
|
|
97
100
|
intersection: list[tuple[str, str]] = _get_intersection(
|
|
98
|
-
|
|
101
|
+
image_uris, json_files, trivy_s3_prefix
|
|
99
102
|
)
|
|
100
103
|
|
|
101
104
|
if len(intersection) == 0:
|
|
@@ -124,21 +127,79 @@ def sync_trivy_aws_ecr_from_s3(
|
|
|
124
127
|
cleanup(neo4j_session, common_job_parameters)
|
|
125
128
|
|
|
126
129
|
|
|
130
|
+
@timeit
|
|
131
|
+
def sync_trivy_aws_ecr_from_dir(
|
|
132
|
+
neo4j_session: Session,
|
|
133
|
+
results_dir: str,
|
|
134
|
+
update_tag: int,
|
|
135
|
+
common_job_parameters: dict[str, Any],
|
|
136
|
+
) -> None:
|
|
137
|
+
"""Sync Trivy scan results from local files for AWS ECR images."""
|
|
138
|
+
logger.info(f"Using Trivy scan results from {results_dir}")
|
|
139
|
+
|
|
140
|
+
image_uris: set[str] = get_scan_targets(neo4j_session)
|
|
141
|
+
json_files: set[str] = get_json_files_in_dir(results_dir)
|
|
142
|
+
|
|
143
|
+
if not json_files:
|
|
144
|
+
logger.error(
|
|
145
|
+
f"Trivy sync was configured, but no json files were found in {results_dir}."
|
|
146
|
+
)
|
|
147
|
+
raise ValueError("No Trivy json results found on disk")
|
|
148
|
+
|
|
149
|
+
logger.info(f"Processing {len(json_files)} local Trivy result files")
|
|
150
|
+
|
|
151
|
+
for file_path in json_files:
|
|
152
|
+
# First, check if the image exists in the graph before syncing
|
|
153
|
+
try:
|
|
154
|
+
# Peek at the artifact name without processing the file
|
|
155
|
+
with open(file_path, encoding="utf-8") as f:
|
|
156
|
+
trivy_data = json.load(f)
|
|
157
|
+
artifact_name = trivy_data.get("ArtifactName")
|
|
158
|
+
|
|
159
|
+
if artifact_name and artifact_name not in image_uris:
|
|
160
|
+
logger.debug(
|
|
161
|
+
f"Skipping results for {artifact_name} since the image is not present in the graph"
|
|
162
|
+
)
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
166
|
+
logger.error(f"Failed to read artifact name from {file_path}: {e}")
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
# Now sync the file since we know the image exists in the graph
|
|
170
|
+
sync_single_image_from_file(
|
|
171
|
+
neo4j_session,
|
|
172
|
+
file_path,
|
|
173
|
+
update_tag,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
177
|
+
|
|
178
|
+
|
|
127
179
|
@timeit
|
|
128
180
|
def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
|
|
129
|
-
"""
|
|
130
|
-
Start Trivy scan ingestion from S3.
|
|
181
|
+
"""Start Trivy scan ingestion from S3 or local files.
|
|
131
182
|
|
|
132
183
|
Args:
|
|
133
184
|
neo4j_session: Neo4j session for database operations
|
|
134
|
-
config: Configuration object containing S3
|
|
185
|
+
config: Configuration object containing S3 or directory paths
|
|
135
186
|
"""
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
187
|
+
if not config.trivy_s3_bucket and not config.trivy_results_dir:
|
|
188
|
+
logger.info("Trivy configuration not provided. Skipping Trivy ingestion.")
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
if config.trivy_results_dir:
|
|
192
|
+
common_job_parameters = {
|
|
193
|
+
"UPDATE_TAG": config.update_tag,
|
|
194
|
+
}
|
|
195
|
+
sync_trivy_aws_ecr_from_dir(
|
|
196
|
+
neo4j_session,
|
|
197
|
+
config.trivy_results_dir,
|
|
198
|
+
config.update_tag,
|
|
199
|
+
common_job_parameters,
|
|
200
|
+
)
|
|
139
201
|
return
|
|
140
202
|
|
|
141
|
-
# Default to empty string if s3 prefix is not provided
|
|
142
203
|
if config.trivy_s3_prefix is None:
|
|
143
204
|
config.trivy_s3_prefix = ""
|
|
144
205
|
|
|
@@ -146,7 +207,6 @@ def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
|
|
|
146
207
|
"UPDATE_TAG": config.update_tag,
|
|
147
208
|
}
|
|
148
209
|
|
|
149
|
-
# Get ECR images to scan
|
|
150
210
|
boto3_session = boto3.Session()
|
|
151
211
|
|
|
152
212
|
sync_trivy_aws_ecr_from_s3(
|