cartography 0.104.0rc3__py3-none-any.whl → 0.106.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (75) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +26 -1
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/config.py +8 -0
  6. cartography/data/indexes.cypher +0 -37
  7. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
  8. cartography/graph/cleanupbuilder.py +151 -41
  9. cartography/intel/aws/acm.py +124 -0
  10. cartography/intel/aws/cloudtrail.py +3 -38
  11. cartography/intel/aws/ecr.py +8 -2
  12. cartography/intel/aws/ecs.py +228 -380
  13. cartography/intel/aws/efs.py +99 -11
  14. cartography/intel/aws/iam.py +1 -1
  15. cartography/intel/aws/identitycenter.py +14 -3
  16. cartography/intel/aws/inspector.py +106 -53
  17. cartography/intel/aws/lambda_function.py +1 -1
  18. cartography/intel/aws/rds.py +2 -1
  19. cartography/intel/aws/resources.py +2 -0
  20. cartography/intel/aws/s3.py +195 -4
  21. cartography/intel/aws/sqs.py +36 -90
  22. cartography/intel/entra/__init__.py +22 -0
  23. cartography/intel/entra/applications.py +366 -0
  24. cartography/intel/entra/groups.py +151 -0
  25. cartography/intel/entra/ou.py +21 -5
  26. cartography/intel/kubernetes/__init__.py +30 -14
  27. cartography/intel/kubernetes/clusters.py +86 -0
  28. cartography/intel/kubernetes/namespaces.py +59 -57
  29. cartography/intel/kubernetes/pods.py +140 -77
  30. cartography/intel/kubernetes/secrets.py +95 -45
  31. cartography/intel/kubernetes/services.py +131 -67
  32. cartography/intel/kubernetes/util.py +125 -14
  33. cartography/intel/trivy/__init__.py +161 -0
  34. cartography/intel/trivy/scanner.py +363 -0
  35. cartography/models/aws/acm/__init__.py +0 -0
  36. cartography/models/aws/acm/certificate.py +75 -0
  37. cartography/models/aws/cloudtrail/trail.py +24 -0
  38. cartography/models/aws/ecs/__init__.py +0 -0
  39. cartography/models/aws/ecs/clusters.py +64 -0
  40. cartography/models/aws/ecs/container_definitions.py +93 -0
  41. cartography/models/aws/ecs/container_instances.py +84 -0
  42. cartography/models/aws/ecs/containers.py +80 -0
  43. cartography/models/aws/ecs/services.py +117 -0
  44. cartography/models/aws/ecs/task_definitions.py +97 -0
  45. cartography/models/aws/ecs/tasks.py +110 -0
  46. cartography/models/aws/efs/file_system.py +60 -0
  47. cartography/models/aws/efs/mount_target.py +29 -2
  48. cartography/models/aws/s3/notification.py +24 -0
  49. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  50. cartography/models/aws/sqs/__init__.py +0 -0
  51. cartography/models/aws/sqs/queue.py +89 -0
  52. cartography/models/core/nodes.py +15 -2
  53. cartography/models/entra/app_role_assignment.py +115 -0
  54. cartography/models/entra/application.py +47 -0
  55. cartography/models/entra/group.py +91 -0
  56. cartography/models/kubernetes/__init__.py +0 -0
  57. cartography/models/kubernetes/clusters.py +26 -0
  58. cartography/models/kubernetes/containers.py +108 -0
  59. cartography/models/kubernetes/namespaces.py +51 -0
  60. cartography/models/kubernetes/pods.py +80 -0
  61. cartography/models/kubernetes/secrets.py +79 -0
  62. cartography/models/kubernetes/services.py +108 -0
  63. cartography/models/trivy/__init__.py +0 -0
  64. cartography/models/trivy/findings.py +66 -0
  65. cartography/models/trivy/fix.py +66 -0
  66. cartography/models/trivy/package.py +71 -0
  67. cartography/sync.py +2 -0
  68. cartography/util.py +15 -10
  69. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/METADATA +3 -2
  70. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/RECORD +74 -40
  71. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  72. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/WHEEL +0 -0
  73. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/entry_points.txt +0 -0
  74. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/licenses/LICENSE +0 -0
  75. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ import neo4j
9
9
  from cartography.client.core.tx import load
10
10
  from cartography.graph.job import GraphJob
11
11
  from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.efs.file_system import EfsFileSystemSchema
12
13
  from cartography.models.aws.efs.mount_target import EfsMountTargetSchema
13
14
  from cartography.util import aws_handle_regions
14
15
  from cartography.util import timeit
@@ -18,16 +19,71 @@ logger = logging.getLogger(__name__)
18
19
 
19
20
  @timeit
20
21
  @aws_handle_regions
21
- def get_efs_mount_targets(
22
+ def get_efs_file_systems(
22
23
  boto3_session: boto3.Session, region: str
23
24
  ) -> List[Dict[str, Any]]:
24
25
  client = boto3_session.client(
25
26
  "efs", region_name=region, config=get_botocore_config()
26
27
  )
28
+ paginator = client.get_paginator("describe_file_systems")
29
+ fileSystems = []
30
+ for page in paginator.paginate():
31
+ fileSystems.extend(page.get("FileSystems", []))
32
+
33
+ return fileSystems
34
+
35
+
36
+ def transform_efs_file_systems(
37
+ fileSystems: List[Dict[str, Any]], region: str
38
+ ) -> List[Dict[str, Any]]:
39
+ """
40
+ Transform SNS topic data for ingestion
41
+ """
42
+ transformed_file_systems = []
43
+ for file_system in fileSystems:
44
+ transformed_file_system = {
45
+ "FileSystemId": file_system["FileSystemId"],
46
+ "FileSystemArn": file_system["FileSystemArn"],
47
+ "OwnerId": file_system.get("OwnerId"),
48
+ "CreationToken": file_system.get("CreationToken"),
49
+ "CreationTime": file_system.get("CreationTime"),
50
+ "LifeCycleState": file_system.get("LifeCycleState"),
51
+ "Name": file_system.get("Name"),
52
+ "NumberOfMountTargets": file_system.get("NumberOfMountTargets"),
53
+ "SizeInBytesValue": file_system.get("SizeInBytes", {}).get("Value"),
54
+ "SizeInBytesTimestamp": file_system.get("SizeInBytes", {}).get("Timestamp"),
55
+ "PerformanceMode": file_system.get("PerformanceMode"),
56
+ "Encrypted": file_system.get("Encrypted"),
57
+ "KmsKeyId": file_system.get("KmsKeyId"),
58
+ "ThroughputMode": file_system.get("ThroughputMode"),
59
+ "AvailabilityZoneName": file_system.get("AvailabilityZoneName"),
60
+ "AvailabilityZoneId": file_system.get("AvailabilityZoneId"),
61
+ "FileSystemProtection": file_system.get("FileSystemProtection", {}).get(
62
+ "ReplicationOverwriteProtection"
63
+ ),
64
+ }
65
+ transformed_file_systems.append(transformed_file_system)
66
+
67
+ return transformed_file_systems
68
+
69
+
70
+ @timeit
71
+ @aws_handle_regions
72
+ def get_efs_mount_targets(
73
+ fileSystems: List[Dict[str, Any]], boto3_session: boto3.Session, region: str
74
+ ) -> List[Dict[str, Any]]:
75
+ client = boto3_session.client(
76
+ "efs", region_name=region, config=get_botocore_config()
77
+ )
78
+ file_system_ids = []
79
+ for file_system in fileSystems:
80
+ file_system_ids.append(file_system["FileSystemId"])
27
81
  paginator = client.get_paginator("describe_mount_targets")
28
82
  mountTargets = []
29
- for page in paginator.paginate():
30
- mountTargets.extend(page["MountTargets"])
83
+ for fs_id in file_system_ids:
84
+ for page in paginator.paginate(FileSystemId=fs_id):
85
+ mountTargets.extend(page.get("MountTargets", []))
86
+
31
87
  return mountTargets
32
88
 
33
89
 
@@ -52,16 +108,39 @@ def load_efs_mount_targets(
52
108
  )
53
109
 
54
110
 
111
+ @timeit
112
+ def load_efs_file_systems(
113
+ neo4j_session: neo4j.Session,
114
+ data: List[Dict[str, Any]],
115
+ region: str,
116
+ current_aws_account_id: str,
117
+ aws_update_tag: int,
118
+ ) -> None:
119
+ logger.info(
120
+ f"Loading Efs {len(data)} file systems for region '{region}' into graph.",
121
+ )
122
+ load(
123
+ neo4j_session,
124
+ EfsFileSystemSchema(),
125
+ data,
126
+ lastupdated=aws_update_tag,
127
+ Region=region,
128
+ AWS_ID=current_aws_account_id,
129
+ )
130
+
131
+
55
132
  @timeit
56
133
  def cleanup(
57
134
  neo4j_session: neo4j.Session,
58
135
  common_job_parameters: Dict[str, Any],
59
136
  ) -> None:
60
137
  logger.debug("Running Efs cleanup job.")
61
- cleanup_job = GraphJob.from_node_schema(
62
- EfsMountTargetSchema(), common_job_parameters
138
+ GraphJob.from_node_schema(EfsMountTargetSchema(), common_job_parameters).run(
139
+ neo4j_session
140
+ )
141
+ GraphJob.from_node_schema(EfsFileSystemSchema(), common_job_parameters).run(
142
+ neo4j_session
63
143
  )
64
- cleanup_job.run(neo4j_session)
65
144
 
66
145
 
67
146
  @timeit
@@ -77,14 +156,23 @@ def sync(
77
156
  logger.info(
78
157
  f"Syncing Efs for region '{region}' in account '{current_aws_account_id}'.",
79
158
  )
80
- mountTargets = get_efs_mount_targets(boto3_session, region)
81
- mount_target_data: List[Dict[str, Any]] = []
82
- for mountTarget in mountTargets:
83
- mount_target_data.append(mountTarget)
159
+
160
+ fileSystems = get_efs_file_systems(boto3_session, region)
161
+ tranformed_file_systems = transform_efs_file_systems(fileSystems, region)
162
+
163
+ load_efs_file_systems(
164
+ neo4j_session,
165
+ tranformed_file_systems,
166
+ region,
167
+ current_aws_account_id,
168
+ update_tag,
169
+ )
170
+
171
+ mountTargets = get_efs_mount_targets(fileSystems, boto3_session, region)
84
172
 
85
173
  load_efs_mount_targets(
86
174
  neo4j_session,
87
- mount_target_data,
175
+ mountTargets,
88
176
  region,
89
177
  current_aws_account_id,
90
178
  update_tag,
@@ -507,7 +507,7 @@ def sync_assumerole_relationships(
507
507
  common_job_parameters: Dict,
508
508
  ) -> None:
509
509
  # Must be called after load_role
510
- # Computes and syncs the STS_ASSUME_ROLE allow relationship
510
+ # Computes and syncs the STS_ASSUMEROLE_ALLOW relationship
511
511
  logger.info(
512
512
  "Syncing assume role mappings for account '%s'.",
513
513
  current_aws_account_id,
@@ -140,13 +140,23 @@ def get_sso_users(
140
140
  for page in paginator.paginate(IdentityStoreId=identity_store_id):
141
141
  user_page = page.get("Users", [])
142
142
  for user in user_page:
143
- if user.get("ExternalIds", None):
144
- user["ExternalId"] = user.get("ExternalIds")[0].get("Id")
145
143
  users.append(user)
146
144
 
147
145
  return users
148
146
 
149
147
 
148
+ def transform_sso_users(users: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
149
+ """
150
+ Transform SSO users to match the expected schema
151
+ """
152
+ transformed_users = []
153
+ for user in users:
154
+ if user.get("ExternalIds") is not None:
155
+ user["ExternalId"] = user["ExternalIds"][0].get("Id")
156
+ transformed_users.append(user)
157
+ return transformed_users
158
+
159
+
150
160
  @timeit
151
161
  def load_sso_users(
152
162
  neo4j_session: neo4j.Session,
@@ -300,9 +310,10 @@ def sync_identity_center_instances(
300
310
  )
301
311
 
302
312
  users = get_sso_users(boto3_session, identity_store_id, region)
313
+ transformed_users = transform_sso_users(users)
303
314
  load_sso_users(
304
315
  neo4j_session,
305
- users,
316
+ transformed_users,
306
317
  identity_store_id,
307
318
  region,
308
319
  current_aws_account_id,
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  from typing import Any
3
3
  from typing import Dict
4
+ from typing import Iterator
4
5
  from typing import List
5
6
  from typing import Tuple
6
7
 
@@ -13,11 +14,14 @@ from cartography.models.aws.inspector.findings import AWSInspectorFindingSchema
13
14
  from cartography.models.aws.inspector.packages import AWSInspectorPackageSchema
14
15
  from cartography.util import aws_handle_regions
15
16
  from cartography.util import aws_paginate
17
+ from cartography.util import batch
16
18
  from cartography.util import timeit
19
+ from cartography.util import to_asynchronous
20
+ from cartography.util import to_synchronous
17
21
 
18
22
  logger = logging.getLogger(__name__)
19
23
 
20
- # As of 7/22/24, Inspector is only available in the below regions. We will need to update this hardcoded list here over
24
+ # As of 7/1/25, Inspector is only available in the below regions. We will need to update this hardcoded list here over
21
25
  # time. :\ https://docs.aws.amazon.com/general/latest/gr/inspector2.html
22
26
  AWS_INSPECTOR_REGIONS = {
23
27
  "us-east-2",
@@ -43,54 +47,64 @@ AWS_INSPECTOR_REGIONS = {
43
47
  "eu-central-2",
44
48
  "me-south-1",
45
49
  "sa-east-1",
50
+ "us-gov-east-1",
51
+ "us-gov-west-1",
46
52
  }
47
53
 
54
+ BATCH_SIZE = 1000
55
+
56
+
57
+ @aws_handle_regions
58
+ def get_member_accounts(
59
+ session: boto3.session.Session,
60
+ region: str,
61
+ ) -> List[str]:
62
+ """
63
+ List all the accounts that have delegated access to the account specified by current_aws_account_id.
64
+ """
65
+ client = session.client("inspector2", region_name=region)
66
+ members = list(aws_paginate(client, "list_members", "members"))
67
+ accounts = [m["accountId"] for m in members]
68
+ return accounts
69
+
48
70
 
49
71
  @timeit
50
72
  @aws_handle_regions
51
73
  def get_inspector_findings(
52
74
  session: boto3.session.Session,
53
75
  region: str,
54
- current_aws_account_id: str,
55
- ) -> List[Dict[str, Any]]:
76
+ account_id: str,
77
+ ) -> Iterator[List[Dict[str, Any]]]:
56
78
  """
57
- We must list_findings by filtering the request, otherwise the request could tiemout.
79
+ Query inspector2.list_findings by filtering the request, otherwise the request could timeout.
58
80
  First, we filter by account_id. And since there may be millions of CLOSED findings that may never go away,
59
- we will only fetch those in ACTIVE or SUPPRESSED statuses.
60
- list_members will get us all the accounts that
61
- have delegated access to the account specified by current_aws_account_id.
81
+ only fetch those in ACTIVE or SUPPRESSED statuses.
82
+ Run the query in batches of 1000 findings and return an iterator to fetch the results.
62
83
  """
63
84
  client = session.client("inspector2", region_name=region)
64
-
65
- members = aws_paginate(client, "list_members", "members")
66
- # the current host account may not be considered a "member", but we still fetch its findings
67
- accounts = [current_aws_account_id] + [m["accountId"] for m in members]
68
-
69
- findings = []
70
- for account in accounts:
71
- logger.info(f"Getting findings for member account {account} in region {region}")
72
- findings.extend(
73
- aws_paginate(
74
- client,
75
- "list_findings",
76
- "findings",
77
- filterCriteria={
78
- "awsAccountId": [
79
- {
80
- "comparison": "EQUALS",
81
- "value": account,
82
- },
83
- ],
84
- "findingStatus": [
85
- {
86
- "comparison": "NOT_EQUALS",
87
- "value": "CLOSED",
88
- },
89
- ],
85
+ logger.info(
86
+ f"Getting findings in batches of {BATCH_SIZE} for account {account_id} in region {region}"
87
+ )
88
+ aws_args: Dict[str, Any] = {
89
+ "filterCriteria": {
90
+ "awsAccountId": [
91
+ {
92
+ "comparison": "EQUALS",
93
+ "value": account_id,
90
94
  },
91
- ),
92
- )
93
- return findings
95
+ ],
96
+ "findingStatus": [
97
+ {
98
+ "comparison": "NOT_EQUALS",
99
+ "value": "CLOSED",
100
+ },
101
+ ],
102
+ }
103
+ }
104
+ findings_batches = batch(
105
+ aws_paginate(client, "list_findings", "findings", None, **aws_args), BATCH_SIZE
106
+ )
107
+ yield from findings_batches
94
108
 
95
109
 
96
110
  def transform_inspector_findings(
@@ -260,26 +274,24 @@ def cleanup(
260
274
  )
261
275
 
262
276
 
263
- @timeit
264
- def sync(
277
+ def _sync_findings_for_account(
265
278
  neo4j_session: neo4j.Session,
266
279
  boto3_session: boto3.session.Session,
267
- regions: List[str],
268
- current_aws_account_id: str,
280
+ region: str,
281
+ account_id: str,
269
282
  update_tag: int,
270
- common_job_parameters: Dict[str, Any],
283
+ current_aws_account_id: str,
271
284
  ) -> None:
272
- inspector_regions = [
273
- region for region in regions if region in AWS_INSPECTOR_REGIONS
274
- ]
275
-
276
- for region in inspector_regions:
277
- logger.info(
278
- f"Syncing AWS Inspector findings for account {current_aws_account_id} and region {region}",
279
- )
280
- findings = get_inspector_findings(boto3_session, region, current_aws_account_id)
281
- finding_data, package_data = transform_inspector_findings(findings)
282
- logger.info(f"Loading {len(finding_data)} findings")
285
+ """
286
+ Syncs the findings for a given account in a given region.
287
+ """
288
+ findings = get_inspector_findings(boto3_session, region, account_id)
289
+ if not findings:
290
+ logger.info(f"No findings to sync for account {account_id} in region {region}")
291
+ return
292
+ for f_batch in findings:
293
+ finding_data, package_data = transform_inspector_findings(f_batch)
294
+ logger.info(f"Loading {len(finding_data)} findings from account {account_id}")
283
295
  load_inspector_findings(
284
296
  neo4j_session,
285
297
  finding_data,
@@ -295,4 +307,45 @@ def sync(
295
307
  update_tag,
296
308
  current_aws_account_id,
297
309
  )
298
- cleanup(neo4j_session, common_job_parameters)
310
+
311
+
312
+ @timeit
313
+ def sync(
314
+ neo4j_session: neo4j.Session,
315
+ boto3_session: boto3.session.Session,
316
+ regions: List[str],
317
+ current_aws_account_id: str,
318
+ update_tag: int,
319
+ common_job_parameters: Dict[str, Any],
320
+ ) -> None:
321
+ inspector_regions = [
322
+ region for region in regions if region in AWS_INSPECTOR_REGIONS
323
+ ]
324
+
325
+ for region in inspector_regions:
326
+ logger.info(
327
+ f"Syncing AWS Inspector findings delegated to account {current_aws_account_id} and region {region}",
328
+ )
329
+ member_accounts = get_member_accounts(boto3_session, region)
330
+ # the current host account may not be considered a "member", but we still fetch its findings
331
+ member_accounts.append(current_aws_account_id)
332
+
333
+ async def async_ingest_findings_for_account(account_id: str) -> None:
334
+ await to_asynchronous(
335
+ _sync_findings_for_account,
336
+ neo4j_session,
337
+ boto3_session,
338
+ region,
339
+ account_id,
340
+ update_tag,
341
+ current_aws_account_id,
342
+ )
343
+
344
+ to_synchronous(
345
+ *[
346
+ async_ingest_findings_for_account(account_id)
347
+ for account_id in member_accounts
348
+ ]
349
+ )
350
+
351
+ cleanup(neo4j_session, common_job_parameters)
@@ -74,7 +74,7 @@ def load_lambda_functions(
74
74
  SET r.lastupdated = $aws_update_tag
75
75
  WITH lambda, lf
76
76
  MATCH (role:AWSPrincipal{arn: lf.Role})
77
- MERGE (lambda)-[r:STS_ASSUME_ROLE_ALLOW]->(role)
77
+ MERGE (lambda)-[r:STS_ASSUMEROLE_ALLOW]->(role)
78
78
  ON CREATE SET r.firstseen = timestamp()
79
79
  SET r.lastupdated = $aws_update_tag
80
80
  """
@@ -263,7 +263,8 @@ def get_rds_snapshot_data(
263
263
  Create an RDS boto3 client and grab all the DBSnapshots.
264
264
  """
265
265
  client = boto3_session.client("rds", region_name=region)
266
- return aws_paginate(client, "describe_db_snapshots", "DBSnapshots")
266
+ snapshots = list(aws_paginate(client, "describe_db_snapshots", "DBSnapshots"))
267
+ return snapshots
267
268
 
268
269
 
269
270
  @timeit
@@ -3,6 +3,7 @@ from typing import Dict
3
3
 
4
4
  from cartography.intel.aws.ec2.route_tables import sync_route_tables
5
5
 
6
+ from . import acm
6
7
  from . import apigateway
7
8
  from . import cloudtrail
8
9
  from . import cloudwatch
@@ -100,6 +101,7 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
100
101
  "sns": sns.sync,
101
102
  "sqs": sqs.sync,
102
103
  "ssm": ssm.sync,
104
+ "acm:certificate": acm.sync,
103
105
  "inspector": inspector.sync,
104
106
  "config": config.sync,
105
107
  "identitycenter": identitycenter.sync_identity_center_instances,