cartography 0.105.0__py3-none-any.whl → 0.106.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (45) hide show
  1. cartography/_version.py +2 -2
  2. cartography/data/indexes.cypher +0 -34
  3. cartography/intel/aws/ecs.py +228 -380
  4. cartography/intel/aws/efs.py +181 -0
  5. cartography/intel/aws/identitycenter.py +14 -3
  6. cartography/intel/aws/inspector.py +106 -53
  7. cartography/intel/aws/rds.py +2 -1
  8. cartography/intel/aws/resources.py +2 -0
  9. cartography/intel/entra/__init__.py +11 -0
  10. cartography/intel/entra/applications.py +366 -0
  11. cartography/intel/kubernetes/__init__.py +30 -14
  12. cartography/intel/kubernetes/clusters.py +86 -0
  13. cartography/intel/kubernetes/namespaces.py +59 -57
  14. cartography/intel/kubernetes/pods.py +140 -77
  15. cartography/intel/kubernetes/secrets.py +95 -45
  16. cartography/intel/kubernetes/services.py +131 -67
  17. cartography/intel/kubernetes/util.py +125 -14
  18. cartography/models/aws/ecs/__init__.py +0 -0
  19. cartography/models/aws/ecs/clusters.py +64 -0
  20. cartography/models/aws/ecs/container_definitions.py +93 -0
  21. cartography/models/aws/ecs/container_instances.py +84 -0
  22. cartography/models/aws/ecs/containers.py +80 -0
  23. cartography/models/aws/ecs/services.py +117 -0
  24. cartography/models/aws/ecs/task_definitions.py +97 -0
  25. cartography/models/aws/ecs/tasks.py +110 -0
  26. cartography/models/aws/efs/__init__.py +0 -0
  27. cartography/models/aws/efs/file_system.py +60 -0
  28. cartography/models/aws/efs/mount_target.py +79 -0
  29. cartography/models/entra/app_role_assignment.py +115 -0
  30. cartography/models/entra/application.py +47 -0
  31. cartography/models/kubernetes/__init__.py +0 -0
  32. cartography/models/kubernetes/clusters.py +26 -0
  33. cartography/models/kubernetes/containers.py +108 -0
  34. cartography/models/kubernetes/namespaces.py +51 -0
  35. cartography/models/kubernetes/pods.py +80 -0
  36. cartography/models/kubernetes/secrets.py +79 -0
  37. cartography/models/kubernetes/services.py +108 -0
  38. cartography/util.py +15 -10
  39. {cartography-0.105.0.dist-info → cartography-0.106.0rc1.dist-info}/METADATA +1 -1
  40. {cartography-0.105.0.dist-info → cartography-0.106.0rc1.dist-info}/RECORD +44 -22
  41. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  42. {cartography-0.105.0.dist-info → cartography-0.106.0rc1.dist-info}/WHEEL +0 -0
  43. {cartography-0.105.0.dist-info → cartography-0.106.0rc1.dist-info}/entry_points.txt +0 -0
  44. {cartography-0.105.0.dist-info → cartography-0.106.0rc1.dist-info}/licenses/LICENSE +0 -0
  45. {cartography-0.105.0.dist-info → cartography-0.106.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,181 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.efs.file_system import EfsFileSystemSchema
13
+ from cartography.models.aws.efs.mount_target import EfsMountTargetSchema
14
+ from cartography.util import aws_handle_regions
15
+ from cartography.util import timeit
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @timeit
21
+ @aws_handle_regions
22
+ def get_efs_file_systems(
23
+ boto3_session: boto3.Session, region: str
24
+ ) -> List[Dict[str, Any]]:
25
+ client = boto3_session.client(
26
+ "efs", region_name=region, config=get_botocore_config()
27
+ )
28
+ paginator = client.get_paginator("describe_file_systems")
29
+ fileSystems = []
30
+ for page in paginator.paginate():
31
+ fileSystems.extend(page.get("FileSystems", []))
32
+
33
+ return fileSystems
34
+
35
+
36
+ def transform_efs_file_systems(
37
+ fileSystems: List[Dict[str, Any]], region: str
38
+ ) -> List[Dict[str, Any]]:
39
+ """
40
+ Transform SNS topic data for ingestion
41
+ """
42
+ transformed_file_systems = []
43
+ for file_system in fileSystems:
44
+ transformed_file_system = {
45
+ "FileSystemId": file_system["FileSystemId"],
46
+ "FileSystemArn": file_system["FileSystemArn"],
47
+ "OwnerId": file_system.get("OwnerId"),
48
+ "CreationToken": file_system.get("CreationToken"),
49
+ "CreationTime": file_system.get("CreationTime"),
50
+ "LifeCycleState": file_system.get("LifeCycleState"),
51
+ "Name": file_system.get("Name"),
52
+ "NumberOfMountTargets": file_system.get("NumberOfMountTargets"),
53
+ "SizeInBytesValue": file_system.get("SizeInBytes", {}).get("Value"),
54
+ "SizeInBytesTimestamp": file_system.get("SizeInBytes", {}).get("Timestamp"),
55
+ "PerformanceMode": file_system.get("PerformanceMode"),
56
+ "Encrypted": file_system.get("Encrypted"),
57
+ "KmsKeyId": file_system.get("KmsKeyId"),
58
+ "ThroughputMode": file_system.get("ThroughputMode"),
59
+ "AvailabilityZoneName": file_system.get("AvailabilityZoneName"),
60
+ "AvailabilityZoneId": file_system.get("AvailabilityZoneId"),
61
+ "FileSystemProtection": file_system.get("FileSystemProtection", {}).get(
62
+ "ReplicationOverwriteProtection"
63
+ ),
64
+ }
65
+ transformed_file_systems.append(transformed_file_system)
66
+
67
+ return transformed_file_systems
68
+
69
+
70
+ @timeit
71
+ @aws_handle_regions
72
+ def get_efs_mount_targets(
73
+ fileSystems: List[Dict[str, Any]], boto3_session: boto3.Session, region: str
74
+ ) -> List[Dict[str, Any]]:
75
+ client = boto3_session.client(
76
+ "efs", region_name=region, config=get_botocore_config()
77
+ )
78
+ file_system_ids = []
79
+ for file_system in fileSystems:
80
+ file_system_ids.append(file_system["FileSystemId"])
81
+ paginator = client.get_paginator("describe_mount_targets")
82
+ mountTargets = []
83
+ for fs_id in file_system_ids:
84
+ for page in paginator.paginate(FileSystemId=fs_id):
85
+ mountTargets.extend(page.get("MountTargets", []))
86
+
87
+ return mountTargets
88
+
89
+
90
+ @timeit
91
+ def load_efs_mount_targets(
92
+ neo4j_session: neo4j.Session,
93
+ data: List[Dict[str, Any]],
94
+ region: str,
95
+ current_aws_account_id: str,
96
+ aws_update_tag: int,
97
+ ) -> None:
98
+ logger.info(
99
+ f"Loading Efs {len(data)} mount targets for region '{region}' into graph.",
100
+ )
101
+ load(
102
+ neo4j_session,
103
+ EfsMountTargetSchema(),
104
+ data,
105
+ lastupdated=aws_update_tag,
106
+ Region=region,
107
+ AWS_ID=current_aws_account_id,
108
+ )
109
+
110
+
111
+ @timeit
112
+ def load_efs_file_systems(
113
+ neo4j_session: neo4j.Session,
114
+ data: List[Dict[str, Any]],
115
+ region: str,
116
+ current_aws_account_id: str,
117
+ aws_update_tag: int,
118
+ ) -> None:
119
+ logger.info(
120
+ f"Loading Efs {len(data)} file systems for region '{region}' into graph.",
121
+ )
122
+ load(
123
+ neo4j_session,
124
+ EfsFileSystemSchema(),
125
+ data,
126
+ lastupdated=aws_update_tag,
127
+ Region=region,
128
+ AWS_ID=current_aws_account_id,
129
+ )
130
+
131
+
132
+ @timeit
133
+ def cleanup(
134
+ neo4j_session: neo4j.Session,
135
+ common_job_parameters: Dict[str, Any],
136
+ ) -> None:
137
+ logger.debug("Running Efs cleanup job.")
138
+ GraphJob.from_node_schema(EfsMountTargetSchema(), common_job_parameters).run(
139
+ neo4j_session
140
+ )
141
+ GraphJob.from_node_schema(EfsFileSystemSchema(), common_job_parameters).run(
142
+ neo4j_session
143
+ )
144
+
145
+
146
+ @timeit
147
+ def sync(
148
+ neo4j_session: neo4j.Session,
149
+ boto3_session: boto3.session.Session,
150
+ regions: List[str],
151
+ current_aws_account_id: str,
152
+ update_tag: int,
153
+ common_job_parameters: Dict[str, Any],
154
+ ) -> None:
155
+ for region in regions:
156
+ logger.info(
157
+ f"Syncing Efs for region '{region}' in account '{current_aws_account_id}'.",
158
+ )
159
+
160
+ fileSystems = get_efs_file_systems(boto3_session, region)
161
+ tranformed_file_systems = transform_efs_file_systems(fileSystems, region)
162
+
163
+ load_efs_file_systems(
164
+ neo4j_session,
165
+ tranformed_file_systems,
166
+ region,
167
+ current_aws_account_id,
168
+ update_tag,
169
+ )
170
+
171
+ mountTargets = get_efs_mount_targets(fileSystems, boto3_session, region)
172
+
173
+ load_efs_mount_targets(
174
+ neo4j_session,
175
+ mountTargets,
176
+ region,
177
+ current_aws_account_id,
178
+ update_tag,
179
+ )
180
+
181
+ cleanup(neo4j_session, common_job_parameters)
@@ -140,13 +140,23 @@ def get_sso_users(
140
140
  for page in paginator.paginate(IdentityStoreId=identity_store_id):
141
141
  user_page = page.get("Users", [])
142
142
  for user in user_page:
143
- if user.get("ExternalIds", None):
144
- user["ExternalId"] = user.get("ExternalIds")[0].get("Id")
145
143
  users.append(user)
146
144
 
147
145
  return users
148
146
 
149
147
 
148
+ def transform_sso_users(users: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
149
+ """
150
+ Transform SSO users to match the expected schema
151
+ """
152
+ transformed_users = []
153
+ for user in users:
154
+ if user.get("ExternalIds") is not None:
155
+ user["ExternalId"] = user["ExternalIds"][0].get("Id")
156
+ transformed_users.append(user)
157
+ return transformed_users
158
+
159
+
150
160
  @timeit
151
161
  def load_sso_users(
152
162
  neo4j_session: neo4j.Session,
@@ -300,9 +310,10 @@ def sync_identity_center_instances(
300
310
  )
301
311
 
302
312
  users = get_sso_users(boto3_session, identity_store_id, region)
313
+ transformed_users = transform_sso_users(users)
303
314
  load_sso_users(
304
315
  neo4j_session,
305
- users,
316
+ transformed_users,
306
317
  identity_store_id,
307
318
  region,
308
319
  current_aws_account_id,
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  from typing import Any
3
3
  from typing import Dict
4
+ from typing import Iterator
4
5
  from typing import List
5
6
  from typing import Tuple
6
7
 
@@ -13,11 +14,14 @@ from cartography.models.aws.inspector.findings import AWSInspectorFindingSchema
13
14
  from cartography.models.aws.inspector.packages import AWSInspectorPackageSchema
14
15
  from cartography.util import aws_handle_regions
15
16
  from cartography.util import aws_paginate
17
+ from cartography.util import batch
16
18
  from cartography.util import timeit
19
+ from cartography.util import to_asynchronous
20
+ from cartography.util import to_synchronous
17
21
 
18
22
  logger = logging.getLogger(__name__)
19
23
 
20
- # As of 7/22/24, Inspector is only available in the below regions. We will need to update this hardcoded list here over
24
+ # As of 7/1/25, Inspector is only available in the below regions. We will need to update this hardcoded list here over
21
25
  # time. :\ https://docs.aws.amazon.com/general/latest/gr/inspector2.html
22
26
  AWS_INSPECTOR_REGIONS = {
23
27
  "us-east-2",
@@ -43,54 +47,64 @@ AWS_INSPECTOR_REGIONS = {
43
47
  "eu-central-2",
44
48
  "me-south-1",
45
49
  "sa-east-1",
50
+ "us-gov-east-1",
51
+ "us-gov-west-1",
46
52
  }
47
53
 
54
+ BATCH_SIZE = 1000
55
+
56
+
57
+ @aws_handle_regions
58
+ def get_member_accounts(
59
+ session: boto3.session.Session,
60
+ region: str,
61
+ ) -> List[str]:
62
+ """
63
+ List all the accounts that have delegated access to the account specified by current_aws_account_id.
64
+ """
65
+ client = session.client("inspector2", region_name=region)
66
+ members = list(aws_paginate(client, "list_members", "members"))
67
+ accounts = [m["accountId"] for m in members]
68
+ return accounts
69
+
48
70
 
49
71
  @timeit
50
72
  @aws_handle_regions
51
73
  def get_inspector_findings(
52
74
  session: boto3.session.Session,
53
75
  region: str,
54
- current_aws_account_id: str,
55
- ) -> List[Dict[str, Any]]:
76
+ account_id: str,
77
+ ) -> Iterator[List[Dict[str, Any]]]:
56
78
  """
57
- We must list_findings by filtering the request, otherwise the request could tiemout.
79
+ Query inspector2.list_findings by filtering the request, otherwise the request could timeout.
58
80
  First, we filter by account_id. And since there may be millions of CLOSED findings that may never go away,
59
- we will only fetch those in ACTIVE or SUPPRESSED statuses.
60
- list_members will get us all the accounts that
61
- have delegated access to the account specified by current_aws_account_id.
81
+ only fetch those in ACTIVE or SUPPRESSED statuses.
82
+ Run the query in batches of 1000 findings and return an iterator to fetch the results.
62
83
  """
63
84
  client = session.client("inspector2", region_name=region)
64
-
65
- members = aws_paginate(client, "list_members", "members")
66
- # the current host account may not be considered a "member", but we still fetch its findings
67
- accounts = [current_aws_account_id] + [m["accountId"] for m in members]
68
-
69
- findings = []
70
- for account in accounts:
71
- logger.info(f"Getting findings for member account {account} in region {region}")
72
- findings.extend(
73
- aws_paginate(
74
- client,
75
- "list_findings",
76
- "findings",
77
- filterCriteria={
78
- "awsAccountId": [
79
- {
80
- "comparison": "EQUALS",
81
- "value": account,
82
- },
83
- ],
84
- "findingStatus": [
85
- {
86
- "comparison": "NOT_EQUALS",
87
- "value": "CLOSED",
88
- },
89
- ],
85
+ logger.info(
86
+ f"Getting findings in batches of {BATCH_SIZE} for account {account_id} in region {region}"
87
+ )
88
+ aws_args: Dict[str, Any] = {
89
+ "filterCriteria": {
90
+ "awsAccountId": [
91
+ {
92
+ "comparison": "EQUALS",
93
+ "value": account_id,
90
94
  },
91
- ),
92
- )
93
- return findings
95
+ ],
96
+ "findingStatus": [
97
+ {
98
+ "comparison": "NOT_EQUALS",
99
+ "value": "CLOSED",
100
+ },
101
+ ],
102
+ }
103
+ }
104
+ findings_batches = batch(
105
+ aws_paginate(client, "list_findings", "findings", None, **aws_args), BATCH_SIZE
106
+ )
107
+ yield from findings_batches
94
108
 
95
109
 
96
110
  def transform_inspector_findings(
@@ -260,26 +274,24 @@ def cleanup(
260
274
  )
261
275
 
262
276
 
263
- @timeit
264
- def sync(
277
+ def _sync_findings_for_account(
265
278
  neo4j_session: neo4j.Session,
266
279
  boto3_session: boto3.session.Session,
267
- regions: List[str],
268
- current_aws_account_id: str,
280
+ region: str,
281
+ account_id: str,
269
282
  update_tag: int,
270
- common_job_parameters: Dict[str, Any],
283
+ current_aws_account_id: str,
271
284
  ) -> None:
272
- inspector_regions = [
273
- region for region in regions if region in AWS_INSPECTOR_REGIONS
274
- ]
275
-
276
- for region in inspector_regions:
277
- logger.info(
278
- f"Syncing AWS Inspector findings for account {current_aws_account_id} and region {region}",
279
- )
280
- findings = get_inspector_findings(boto3_session, region, current_aws_account_id)
281
- finding_data, package_data = transform_inspector_findings(findings)
282
- logger.info(f"Loading {len(finding_data)} findings")
285
+ """
286
+ Syncs the findings for a given account in a given region.
287
+ """
288
+ findings = get_inspector_findings(boto3_session, region, account_id)
289
+ if not findings:
290
+ logger.info(f"No findings to sync for account {account_id} in region {region}")
291
+ return
292
+ for f_batch in findings:
293
+ finding_data, package_data = transform_inspector_findings(f_batch)
294
+ logger.info(f"Loading {len(finding_data)} findings from account {account_id}")
283
295
  load_inspector_findings(
284
296
  neo4j_session,
285
297
  finding_data,
@@ -295,4 +307,45 @@ def sync(
295
307
  update_tag,
296
308
  current_aws_account_id,
297
309
  )
298
- cleanup(neo4j_session, common_job_parameters)
310
+
311
+
312
+ @timeit
313
+ def sync(
314
+ neo4j_session: neo4j.Session,
315
+ boto3_session: boto3.session.Session,
316
+ regions: List[str],
317
+ current_aws_account_id: str,
318
+ update_tag: int,
319
+ common_job_parameters: Dict[str, Any],
320
+ ) -> None:
321
+ inspector_regions = [
322
+ region for region in regions if region in AWS_INSPECTOR_REGIONS
323
+ ]
324
+
325
+ for region in inspector_regions:
326
+ logger.info(
327
+ f"Syncing AWS Inspector findings delegated to account {current_aws_account_id} and region {region}",
328
+ )
329
+ member_accounts = get_member_accounts(boto3_session, region)
330
+ # the current host account may not be considered a "member", but we still fetch its findings
331
+ member_accounts.append(current_aws_account_id)
332
+
333
+ async def async_ingest_findings_for_account(account_id: str) -> None:
334
+ await to_asynchronous(
335
+ _sync_findings_for_account,
336
+ neo4j_session,
337
+ boto3_session,
338
+ region,
339
+ account_id,
340
+ update_tag,
341
+ current_aws_account_id,
342
+ )
343
+
344
+ to_synchronous(
345
+ *[
346
+ async_ingest_findings_for_account(account_id)
347
+ for account_id in member_accounts
348
+ ]
349
+ )
350
+
351
+ cleanup(neo4j_session, common_job_parameters)
@@ -263,7 +263,8 @@ def get_rds_snapshot_data(
263
263
  Create an RDS boto3 client and grab all the DBSnapshots.
264
264
  """
265
265
  client = boto3_session.client("rds", region_name=region)
266
- return aws_paginate(client, "describe_db_snapshots", "DBSnapshots")
266
+ snapshots = list(aws_paginate(client, "describe_db_snapshots", "DBSnapshots"))
267
+ return snapshots
267
268
 
268
269
 
269
270
  @timeit
@@ -11,6 +11,7 @@ from . import config
11
11
  from . import dynamodb
12
12
  from . import ecr
13
13
  from . import ecs
14
+ from . import efs
14
15
  from . import eks
15
16
  from . import elasticache
16
17
  from . import elasticsearch
@@ -106,4 +107,5 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
106
107
  "identitycenter": identitycenter.sync_identity_center_instances,
107
108
  "cloudtrail": cloudtrail.sync,
108
109
  "cloudwatch": cloudwatch.sync,
110
+ "efs": efs.sync,
109
111
  }
@@ -4,6 +4,7 @@ import logging
4
4
  import neo4j
5
5
 
6
6
  from cartography.config import Config
7
+ from cartography.intel.entra.applications import sync_entra_applications
7
8
  from cartography.intel.entra.groups import sync_entra_groups
8
9
  from cartography.intel.entra.ou import sync_entra_ous
9
10
  from cartography.intel.entra.users import sync_entra_users
@@ -68,5 +69,15 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
68
69
  common_job_parameters,
69
70
  )
70
71
 
72
+ # Run application sync
73
+ await sync_entra_applications(
74
+ neo4j_session,
75
+ config.entra_tenant_id,
76
+ config.entra_client_id,
77
+ config.entra_client_secret,
78
+ config.update_tag,
79
+ common_job_parameters,
80
+ )
81
+
71
82
  # Execute both syncs in sequence
72
83
  asyncio.run(main())