cartography 0.104.0rc2__py3-none-any.whl → 0.105.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (44) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +26 -1
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/config.py +8 -0
  6. cartography/data/indexes.cypher +0 -3
  7. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
  8. cartography/graph/cleanupbuilder.py +151 -41
  9. cartography/intel/aws/acm.py +124 -0
  10. cartography/intel/aws/cloudtrail.py +3 -38
  11. cartography/intel/aws/ecr.py +8 -2
  12. cartography/intel/aws/iam.py +1 -1
  13. cartography/intel/aws/lambda_function.py +1 -1
  14. cartography/intel/aws/resources.py +2 -2
  15. cartography/intel/aws/s3.py +195 -4
  16. cartography/intel/aws/secretsmanager.py +19 -5
  17. cartography/intel/aws/sqs.py +36 -90
  18. cartography/intel/entra/__init__.py +11 -0
  19. cartography/intel/entra/groups.py +151 -0
  20. cartography/intel/entra/ou.py +21 -5
  21. cartography/intel/trivy/__init__.py +161 -0
  22. cartography/intel/trivy/scanner.py +363 -0
  23. cartography/models/aws/acm/certificate.py +75 -0
  24. cartography/models/aws/cloudtrail/trail.py +24 -0
  25. cartography/models/aws/s3/notification.py +24 -0
  26. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  27. cartography/models/aws/sqs/__init__.py +0 -0
  28. cartography/models/aws/sqs/queue.py +89 -0
  29. cartography/models/core/nodes.py +15 -2
  30. cartography/models/entra/group.py +91 -0
  31. cartography/models/trivy/__init__.py +0 -0
  32. cartography/models/trivy/findings.py +66 -0
  33. cartography/models/trivy/fix.py +66 -0
  34. cartography/models/trivy/package.py +71 -0
  35. cartography/sync.py +2 -0
  36. {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/METADATA +3 -2
  37. {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/RECORD +42 -30
  38. cartography/intel/aws/efs.py +0 -93
  39. cartography/models/aws/efs/mount_target.py +0 -52
  40. /cartography/models/aws/{efs → acm}/__init__.py +0 -0
  41. {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/WHEEL +0 -0
  42. {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/entry_points.txt +0 -0
  43. {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/licenses/LICENSE +0 -0
  44. {cartography-0.104.0rc2.dist-info → cartography-0.105.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,151 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ from azure.identity import ClientSecretCredential
6
+ from msgraph import GraphServiceClient
7
+ from msgraph.generated.models.directory_object import DirectoryObject
8
+ from msgraph.generated.models.group import Group
9
+
10
+ from cartography.client.core.tx import load
11
+ from cartography.graph.job import GraphJob
12
+ from cartography.intel.entra.users import load_tenant
13
+ from cartography.models.entra.group import EntraGroupSchema
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ async def get_entra_groups(client: GraphServiceClient) -> list[Group]:
21
+ """Get all groups from Microsoft Graph API with pagination."""
22
+ all_groups: list[Group] = []
23
+
24
+ request_configuration = client.groups.GroupsRequestBuilderGetRequestConfiguration(
25
+ query_parameters=client.groups.GroupsRequestBuilderGetQueryParameters(top=999)
26
+ )
27
+ page = await client.groups.get(request_configuration=request_configuration)
28
+ while page:
29
+ if page.value:
30
+ all_groups.extend(page.value)
31
+ if not page.odata_next_link:
32
+ break
33
+ page = await client.groups.with_url(page.odata_next_link).get()
34
+
35
+ return all_groups
36
+
37
+
38
+ @timeit
39
+ async def get_group_members(
40
+ client: GraphServiceClient, group_id: str
41
+ ) -> tuple[list[str], list[str]]:
42
+ """Get member user IDs and subgroup IDs for a given group."""
43
+ user_ids: list[str] = []
44
+ group_ids: list[str] = []
45
+ request_builder = client.groups.by_group_id(group_id).members
46
+ page = await request_builder.get()
47
+ while page:
48
+ if page.value:
49
+ for obj in page.value:
50
+ if isinstance(obj, DirectoryObject):
51
+ odata_type = getattr(obj, "odata_type", "")
52
+ if odata_type == "#microsoft.graph.user":
53
+ user_ids.append(obj.id)
54
+ elif odata_type == "#microsoft.graph.group":
55
+ group_ids.append(obj.id)
56
+ if not page.odata_next_link:
57
+ break
58
+ page = await request_builder.with_url(page.odata_next_link).get()
59
+ return user_ids, group_ids
60
+
61
+
62
+ def transform_groups(
63
+ groups: list[Group],
64
+ user_member_map: dict[str, list[str]],
65
+ group_member_map: dict[str, list[str]],
66
+ ) -> list[dict[str, Any]]:
67
+ """Transform API responses into dictionaries for ingestion."""
68
+ result: list[dict[str, Any]] = []
69
+ for g in groups:
70
+ transformed = {
71
+ "id": g.id,
72
+ "display_name": g.display_name,
73
+ "description": g.description,
74
+ "mail": g.mail,
75
+ "mail_nickname": g.mail_nickname,
76
+ "mail_enabled": g.mail_enabled,
77
+ "security_enabled": g.security_enabled,
78
+ "group_types": g.group_types,
79
+ "visibility": g.visibility,
80
+ "is_assignable_to_role": g.is_assignable_to_role,
81
+ "created_date_time": g.created_date_time,
82
+ "deleted_date_time": g.deleted_date_time,
83
+ "member_ids": user_member_map.get(g.id, []),
84
+ "member_group_ids": group_member_map.get(g.id, []),
85
+ }
86
+ result.append(transformed)
87
+ return result
88
+
89
+
90
+ @timeit
91
+ def load_groups(
92
+ neo4j_session: neo4j.Session,
93
+ groups: list[dict[str, Any]],
94
+ update_tag: int,
95
+ tenant_id: str,
96
+ ) -> None:
97
+ logger.info(f"Loading {len(groups)} Entra groups")
98
+ load(
99
+ neo4j_session,
100
+ EntraGroupSchema(),
101
+ groups,
102
+ lastupdated=update_tag,
103
+ TENANT_ID=tenant_id,
104
+ )
105
+
106
+
107
+ @timeit
108
+ def cleanup_groups(
109
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
110
+ ) -> None:
111
+ GraphJob.from_node_schema(EntraGroupSchema(), common_job_parameters).run(
112
+ neo4j_session
113
+ )
114
+
115
+
116
+ @timeit
117
+ async def sync_entra_groups(
118
+ neo4j_session: neo4j.Session,
119
+ tenant_id: str,
120
+ client_id: str,
121
+ client_secret: str,
122
+ update_tag: int,
123
+ common_job_parameters: dict[str, Any],
124
+ ) -> None:
125
+ """Sync Entra groups."""
126
+ credential = ClientSecretCredential(
127
+ tenant_id=tenant_id, client_id=client_id, client_secret=client_secret
128
+ )
129
+ client = GraphServiceClient(
130
+ credential, scopes=["https://graph.microsoft.com/.default"]
131
+ )
132
+
133
+ groups = await get_entra_groups(client)
134
+
135
+ user_member_map: dict[str, list[str]] = {}
136
+ group_member_map: dict[str, list[str]] = {}
137
+ for group in groups:
138
+ try:
139
+ users, subgroups = await get_group_members(client, group.id)
140
+ user_member_map[group.id] = users
141
+ group_member_map[group.id] = subgroups
142
+ except Exception as e:
143
+ logger.error(f"Failed to fetch members for group {group.id}: {e}")
144
+ user_member_map[group.id] = []
145
+ group_member_map[group.id] = []
146
+
147
+ transformed_groups = transform_groups(groups, user_member_map, group_member_map)
148
+
149
+ load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
150
+ load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
151
+ cleanup_groups(neo4j_session, common_job_parameters)
@@ -22,12 +22,28 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
22
22
  Get all OUs from Microsoft Graph API with pagination support
23
23
  """
24
24
  all_units: list[AdministrativeUnit] = []
25
- request = client.directory.administrative_units.request()
26
25
 
27
- while request:
28
- response = await request.get()
29
- all_units.extend(response.value)
30
- request = response.odata_next_link if response.odata_next_link else None
26
+ # Initialize first page request
27
+ current_request = client.directory.administrative_units
28
+
29
+ while current_request:
30
+ try:
31
+ response = await current_request.get()
32
+ if response and response.value:
33
+ all_units.extend(response.value)
34
+
35
+ # Handle next page using OData link
36
+ if response.odata_next_link:
37
+ current_request = client.directory.administrative_units.with_url(
38
+ response.odata_next_link
39
+ )
40
+ else:
41
+ current_request = None
42
+ else:
43
+ current_request = None
44
+ except Exception as e:
45
+ logger.error(f"Failed to retrieve administrative units: {str(e)}")
46
+ current_request = None
31
47
 
32
48
  return all_units
33
49
 
@@ -0,0 +1,161 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import boto3
5
+ from neo4j import Session
6
+
7
+ from cartography.client.aws import list_accounts
8
+ from cartography.client.aws.ecr import get_ecr_images
9
+ from cartography.config import Config
10
+ from cartography.intel.trivy.scanner import cleanup
11
+ from cartography.intel.trivy.scanner import get_json_files_in_s3
12
+ from cartography.intel.trivy.scanner import sync_single_image_from_s3
13
+ from cartography.stats import get_stats_client
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+ stat_handler = get_stats_client("trivy.scanner")
18
+
19
+
20
+ @timeit
21
+ def get_scan_targets(
22
+ neo4j_session: Session,
23
+ account_ids: list[str] | None = None,
24
+ ) -> set[str]:
25
+ """
26
+ Return list of ECR images from all accounts in the graph.
27
+ """
28
+ if not account_ids:
29
+ aws_accounts = list_accounts(neo4j_session)
30
+ else:
31
+ aws_accounts = account_ids
32
+
33
+ ecr_images: set[str] = set()
34
+ for account_id in aws_accounts:
35
+ for _, _, image_uri, _, _ in get_ecr_images(neo4j_session, account_id):
36
+ ecr_images.add(image_uri)
37
+
38
+ return ecr_images
39
+
40
+
41
+ def _get_intersection(
42
+ images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
43
+ ) -> list[tuple[str, str]]:
44
+ """
45
+ Get the intersection of ECR images in the graph and S3 scan results.
46
+
47
+ Args:
48
+ images_in_graph: Set of ECR images in the graph
49
+ json_files: Set of S3 object keys for JSON files
50
+ trivy_s3_prefix: S3 prefix path containing scan results
51
+
52
+ Returns:
53
+ List of tuples (image_uri, s3_object_key)
54
+ """
55
+ intersection = []
56
+ prefix_len = len(trivy_s3_prefix)
57
+ for s3_object_key in json_files:
58
+ # Sample key "123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
59
+ # Sample key "folder/derp/123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
60
+ # Remove the prefix and the .json suffix
61
+ image_uri = s3_object_key[prefix_len:-5]
62
+
63
+ if image_uri in images_in_graph:
64
+ intersection.append((image_uri, s3_object_key))
65
+
66
+ return intersection
67
+
68
+
69
+ @timeit
70
+ def sync_trivy_aws_ecr_from_s3(
71
+ neo4j_session: Session,
72
+ trivy_s3_bucket: str,
73
+ trivy_s3_prefix: str,
74
+ update_tag: int,
75
+ common_job_parameters: dict[str, Any],
76
+ boto3_session: boto3.Session,
77
+ ) -> None:
78
+ """
79
+ Sync Trivy scan results from S3 for AWS ECR images.
80
+
81
+ Args:
82
+ neo4j_session: Neo4j session for database operations
83
+ trivy_s3_bucket: S3 bucket containing scan results
84
+ trivy_s3_prefix: S3 prefix path containing scan results
85
+ update_tag: Update tag for tracking
86
+ common_job_parameters: Common job parameters for cleanup
87
+ boto3_session: boto3 session for S3 operations
88
+ """
89
+ logger.info(
90
+ f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
91
+ )
92
+
93
+ images_in_graph: set[str] = get_scan_targets(neo4j_session)
94
+ json_files: set[str] = get_json_files_in_s3(
95
+ trivy_s3_bucket, trivy_s3_prefix, boto3_session
96
+ )
97
+ intersection: list[tuple[str, str]] = _get_intersection(
98
+ images_in_graph, json_files, trivy_s3_prefix
99
+ )
100
+
101
+ if len(intersection) == 0:
102
+ logger.error(
103
+ f"Trivy sync was configured, but there are no ECR images with S3 json scan results in bucket "
104
+ f"'{trivy_s3_bucket}' with prefix '{trivy_s3_prefix}'. "
105
+ "Skipping Trivy sync to avoid potential data loss. "
106
+ "Please check the S3 bucket and prefix configuration. We expect the json files in s3 to be named "
107
+ f"`<image_uri>.json` and to be in the same bucket and prefix as the scan results. If the prefix is "
108
+ "a folder, it MUST end with a trailing slash '/'. "
109
+ )
110
+ logger.error(f"JSON files in S3: {json_files}")
111
+ raise ValueError("No ECR images with S3 json scan results found.")
112
+
113
+ logger.info(f"Processing {len(intersection)} ECR images with S3 scan results")
114
+ for image_uri, s3_object_key in intersection:
115
+ sync_single_image_from_s3(
116
+ neo4j_session,
117
+ image_uri,
118
+ update_tag,
119
+ trivy_s3_bucket,
120
+ s3_object_key,
121
+ boto3_session,
122
+ )
123
+
124
+ cleanup(neo4j_session, common_job_parameters)
125
+
126
+
127
+ @timeit
128
+ def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
129
+ """
130
+ Start Trivy scan ingestion from S3.
131
+
132
+ Args:
133
+ neo4j_session: Neo4j session for database operations
134
+ config: Configuration object containing S3 settings
135
+ """
136
+ # Check if S3 configuration is provided
137
+ if not config.trivy_s3_bucket:
138
+ logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
139
+ return
140
+
141
+ # Default to empty string if s3 prefix is not provided
142
+ if config.trivy_s3_prefix is None:
143
+ config.trivy_s3_prefix = ""
144
+
145
+ common_job_parameters = {
146
+ "UPDATE_TAG": config.update_tag,
147
+ }
148
+
149
+ # Get ECR images to scan
150
+ boto3_session = boto3.Session()
151
+
152
+ sync_trivy_aws_ecr_from_s3(
153
+ neo4j_session,
154
+ config.trivy_s3_bucket,
155
+ config.trivy_s3_prefix,
156
+ config.update_tag,
157
+ common_job_parameters,
158
+ boto3_session,
159
+ )
160
+
161
+ # Support other Trivy resource types here e.g. if Google Cloud has images.