cartography 0.112.0__py3-none-any.whl → 0.114.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (82) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +8 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -31
  5. cartography/intel/aws/apigatewayv2.py +116 -0
  6. cartography/intel/aws/iam.py +741 -492
  7. cartography/intel/aws/organizations.py +7 -8
  8. cartography/intel/aws/permission_relationships.py +4 -16
  9. cartography/intel/aws/resources.py +2 -0
  10. cartography/intel/azure/__init__.py +16 -0
  11. cartography/intel/azure/app_service.py +105 -0
  12. cartography/intel/azure/functions.py +124 -0
  13. cartography/intel/entra/__init__.py +31 -0
  14. cartography/intel/entra/app_role_assignments.py +277 -0
  15. cartography/intel/entra/applications.py +4 -238
  16. cartography/intel/entra/federation/__init__.py +0 -0
  17. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  18. cartography/intel/entra/service_principals.py +217 -0
  19. cartography/intel/gcp/__init__.py +136 -436
  20. cartography/intel/gcp/clients.py +65 -0
  21. cartography/intel/gcp/compute.py +18 -44
  22. cartography/intel/gcp/crm/__init__.py +0 -0
  23. cartography/intel/gcp/crm/folders.py +108 -0
  24. cartography/intel/gcp/crm/orgs.py +65 -0
  25. cartography/intel/gcp/crm/projects.py +109 -0
  26. cartography/intel/gcp/dns.py +82 -169
  27. cartography/intel/gcp/gke.py +72 -113
  28. cartography/intel/gcp/iam.py +66 -54
  29. cartography/intel/gcp/storage.py +75 -159
  30. cartography/intel/github/__init__.py +41 -0
  31. cartography/intel/github/commits.py +423 -0
  32. cartography/intel/github/repos.py +73 -39
  33. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  34. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  35. cartography/models/aws/iam/access_key.py +103 -0
  36. cartography/models/aws/iam/account_role.py +24 -0
  37. cartography/models/aws/iam/federated_principal.py +60 -0
  38. cartography/models/aws/iam/group.py +60 -0
  39. cartography/models/aws/iam/group_membership.py +26 -0
  40. cartography/models/aws/iam/inline_policy.py +78 -0
  41. cartography/models/aws/iam/managed_policy.py +51 -0
  42. cartography/models/aws/iam/policy_statement.py +57 -0
  43. cartography/models/aws/iam/role.py +83 -0
  44. cartography/models/aws/iam/root_principal.py +52 -0
  45. cartography/models/aws/iam/service_principal.py +30 -0
  46. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  47. cartography/models/aws/iam/user.py +54 -0
  48. cartography/models/azure/__init__.py +0 -0
  49. cartography/models/azure/app_service.py +59 -0
  50. cartography/models/azure/function_app.py +59 -0
  51. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  52. cartography/models/entra/service_principal.py +104 -0
  53. cartography/models/gcp/compute/subnet.py +74 -0
  54. cartography/models/gcp/crm/__init__.py +0 -0
  55. cartography/models/gcp/crm/folders.py +98 -0
  56. cartography/models/gcp/crm/organizations.py +21 -0
  57. cartography/models/gcp/crm/projects.py +100 -0
  58. cartography/models/gcp/dns.py +109 -0
  59. cartography/models/gcp/gke.py +69 -0
  60. cartography/models/gcp/iam.py +3 -0
  61. cartography/models/gcp/storage/__init__.py +0 -0
  62. cartography/models/gcp/storage/bucket.py +119 -0
  63. cartography/models/github/commits.py +63 -0
  64. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
  65. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/RECORD +69 -39
  66. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  67. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  68. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  69. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  70. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  71. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  72. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  73. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  74. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  75. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  76. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  77. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  78. cartography/intel/gcp/crm.py +0 -355
  79. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
  80. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
  81. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
  82. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,65 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ import googleapiclient.discovery
5
+ import httplib2
6
+ from google.auth import default
7
+ from google.auth.credentials import Credentials as GoogleCredentials
8
+ from google.auth.exceptions import DefaultCredentialsError
9
+ from google_auth_httplib2 import AuthorizedHttp
10
+ from googleapiclient.discovery import Resource
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Default HTTP timeout (seconds) for Google API clients built via discovery.build
15
+ _GCP_HTTP_TIMEOUT = 120
16
+
17
+
18
+ def _authorized_http_with_timeout(
19
+ credentials: GoogleCredentials,
20
+ timeout: int = _GCP_HTTP_TIMEOUT,
21
+ ) -> AuthorizedHttp:
22
+ """
23
+ Build an AuthorizedHttp with a per-request timeout, avoiding global socket timeouts.
24
+ """
25
+ return AuthorizedHttp(credentials, http=httplib2.Http(timeout=timeout))
26
+
27
+
28
+ def build_client(service: str, version: str = "v1") -> Resource:
29
+ credentials = get_gcp_credentials()
30
+ if credentials is None:
31
+ raise RuntimeError("GCP credentials are not available; cannot build client.")
32
+ client = googleapiclient.discovery.build(
33
+ service,
34
+ version,
35
+ http=_authorized_http_with_timeout(credentials),
36
+ cache_discovery=False,
37
+ )
38
+ return client
39
+
40
+
41
+ def get_gcp_credentials() -> Optional[GoogleCredentials]:
42
+ """
43
+ Gets access tokens for GCP API access.
44
+ """
45
+ try:
46
+ # Explicitly use Application Default Credentials with the cloud-platform scope.
47
+ credentials, _ = default(
48
+ scopes=["https://www.googleapis.com/auth/cloud-platform"],
49
+ )
50
+ return credentials
51
+ except DefaultCredentialsError as e:
52
+ logger.debug(
53
+ "Error occurred calling google.auth.default().",
54
+ exc_info=True,
55
+ )
56
+ logger.error(
57
+ (
58
+ "Unable to initialize Google Compute Platform creds. If you don't have GCP data or don't want to load "
59
+ "GCP data then you can ignore this message. Otherwise, the error code is: %s "
60
+ "Make sure your GCP credentials are configured correctly, your credentials file (if any) is valid, and "
61
+ "that the identity you are authenticating to has the securityReviewer role attached."
62
+ ),
63
+ e,
64
+ )
65
+ return None
@@ -656,51 +656,25 @@ def load_gcp_subnets(
656
656
  neo4j_session: neo4j.Session,
657
657
  subnets: List[Dict],
658
658
  gcp_update_tag: int,
659
+ project_id: str,
659
660
  ) -> None:
660
661
  """
661
- Ingest GCP subnet data to Neo4j
662
+ Ingest GCP subnet data to Neo4j using the data model
662
663
  :param neo4j_session: The Neo4j session
663
664
  :param subnets: List of the subnets
664
665
  :param gcp_update_tag: The timestamp to set these Neo4j nodes with
666
+ :param project_id: The project ID
665
667
  :return: Nothing
666
668
  """
667
- query = """
668
- MERGE(vpc:GCPVpc{id:$VpcPartialUri})
669
- ON CREATE SET vpc.firstseen = timestamp(),
670
- vpc.partial_uri = $VpcPartialUri
669
+ from cartography.models.gcp.compute.subnet import GCPSubnetSchema
671
670
 
672
- MERGE(subnet:GCPSubnet{id:$PartialUri})
673
- ON CREATE SET subnet.firstseen = timestamp(),
674
- subnet.partial_uri = $PartialUri
675
- SET subnet.self_link = $SubnetSelfLink,
676
- subnet.project_id = $ProjectId,
677
- subnet.name = $SubnetName,
678
- subnet.region = $Region,
679
- subnet.gateway_address = $GatewayAddress,
680
- subnet.ip_cidr_range = $IpCidrRange,
681
- subnet.private_ip_google_access = $PrivateIpGoogleAccess,
682
- subnet.vpc_partial_uri = $VpcPartialUri,
683
- subnet.lastupdated = $gcp_update_tag
684
-
685
- MERGE (vpc)-[r:RESOURCE]->(subnet)
686
- ON CREATE SET r.firstseen = timestamp()
687
- SET r.lastupdated = $gcp_update_tag
688
- """
689
- for s in subnets:
690
- neo4j_session.run(
691
- query,
692
- VpcPartialUri=s["vpc_partial_uri"],
693
- VpcSelfLink=s["vpc_self_link"],
694
- PartialUri=s["partial_uri"],
695
- SubnetSelfLink=s["self_link"],
696
- ProjectId=s["project_id"],
697
- SubnetName=s["name"],
698
- Region=s["region"],
699
- GatewayAddress=s["gateway_address"],
700
- IpCidrRange=s["ip_cidr_range"],
701
- PrivateIpGoogleAccess=s["private_ip_google_access"],
702
- gcp_update_tag=gcp_update_tag,
703
- )
671
+ load(
672
+ neo4j_session,
673
+ GCPSubnetSchema(),
674
+ subnets,
675
+ lastupdated=gcp_update_tag,
676
+ PROJECT_ID=project_id,
677
+ )
704
678
 
705
679
 
706
680
  @timeit
@@ -981,7 +955,7 @@ def _attach_gcp_vpc(
981
955
  """
982
956
  query = """
983
957
  MATCH (i:GCPInstance{id:$InstanceId})-[:NETWORK_INTERFACE]->(nic:GCPNetworkInterface)
984
- -[p:PART_OF_SUBNET]->(sn:GCPSubnet)<-[r:RESOURCE]-(vpc:GCPVpc)
958
+ -[p:PART_OF_SUBNET]->(sn:GCPSubnet)<-[r:HAS]-(vpc:GCPVpc)
985
959
  MERGE (i)-[m:MEMBER_OF_GCP_VPC]->(vpc)
986
960
  ON CREATE SET m.firstseen = timestamp()
987
961
  SET m.lastupdated = $gcp_update_tag
@@ -1185,15 +1159,15 @@ def cleanup_gcp_subnets(
1185
1159
  common_job_parameters: Dict,
1186
1160
  ) -> None:
1187
1161
  """
1188
- Delete out-of-date GCP VPC subnet nodes and relationships
1162
+ Delete out-of-date GCP VPC subnet nodes and relationships using data model
1189
1163
  :param neo4j_session: The Neo4j session
1190
1164
  :param common_job_parameters: dict of other job parameters to pass to Neo4j
1191
1165
  :return: Nothing
1192
1166
  """
1193
- run_cleanup_job(
1194
- "gcp_compute_vpc_subnet_cleanup.json",
1195
- neo4j_session,
1196
- common_job_parameters,
1167
+ from cartography.models.gcp.compute.subnet import GCPSubnetSchema
1168
+
1169
+ GraphJob.from_node_schema(GCPSubnetSchema(), common_job_parameters).run(
1170
+ neo4j_session
1197
1171
  )
1198
1172
 
1199
1173
 
@@ -1296,7 +1270,7 @@ def sync_gcp_subnets(
1296
1270
  for r in regions:
1297
1271
  subnet_res = get_gcp_subnets(project_id, r, compute)
1298
1272
  subnets = transform_gcp_subnets(subnet_res)
1299
- load_gcp_subnets(neo4j_session, subnets, gcp_update_tag)
1273
+ load_gcp_subnets(neo4j_session, subnets, gcp_update_tag, project_id)
1300
1274
  # TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
1301
1275
  cleanup_gcp_subnets(neo4j_session, common_job_parameters)
1302
1276
 
File without changes
@@ -0,0 +1,108 @@
1
+ import logging
2
+ from typing import Dict
3
+ from typing import List
4
+
5
+ import neo4j
6
+ from google.cloud import resourcemanager_v3
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.models.gcp.crm.folders import GCPFolderSchema
10
+ from cartography.util import timeit
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @timeit
16
+ def get_gcp_folders(org_resource_name: str) -> List[Dict]:
17
+ """
18
+ Return a list of all descendant GCP folders under the specified organization by traversing the folder tree.
19
+
20
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
21
+ :return: List of folder dicts with 'name' field containing full resource names (e.g., "folders/123456")
22
+ """
23
+ results: List[Dict] = []
24
+ client = resourcemanager_v3.FoldersClient()
25
+ # BFS over folders starting at the org root
26
+ queue: List[str] = [org_resource_name]
27
+ seen: set[str] = set()
28
+ while queue:
29
+ parent = queue.pop(0)
30
+ if parent in seen:
31
+ continue
32
+ seen.add(parent)
33
+
34
+ for folder in client.list_folders(parent=parent):
35
+ results.append(
36
+ {
37
+ "name": folder.name,
38
+ "parent": parent,
39
+ "displayName": folder.display_name,
40
+ "lifecycleState": folder.state.name,
41
+ }
42
+ )
43
+ if folder.name:
44
+ queue.append(folder.name)
45
+ return results
46
+
47
+
48
+ @timeit
49
+ def transform_gcp_folders(data: List[Dict]) -> List[Dict]:
50
+ """
51
+ Transform GCP folder data to add parent_org or parent_folder fields based on parent type.
52
+
53
+ :param data: List of folder dicts
54
+ :return: List of transformed folder dicts with parent_org and parent_folder fields
55
+ """
56
+ for folder in data:
57
+ folder["parent_org"] = None
58
+ folder["parent_folder"] = None
59
+
60
+ if folder["parent"].startswith("organizations"):
61
+ folder["parent_org"] = folder["parent"]
62
+ elif folder["parent"].startswith("folders"):
63
+ folder["parent_folder"] = folder["parent"]
64
+ else:
65
+ logger.warning(
66
+ f"Folder {folder['name']} has unexpected parent type: {folder['parent']}"
67
+ )
68
+
69
+ return data
70
+
71
+
72
+ @timeit
73
+ def load_gcp_folders(
74
+ neo4j_session: neo4j.Session,
75
+ data: List[Dict],
76
+ gcp_update_tag: int,
77
+ org_resource_name: str,
78
+ ) -> None:
79
+ """
80
+ Load GCP folders into the graph.
81
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
82
+ """
83
+ transformed_data = transform_gcp_folders(data)
84
+ load(
85
+ neo4j_session,
86
+ GCPFolderSchema(),
87
+ transformed_data,
88
+ lastupdated=gcp_update_tag,
89
+ ORG_RESOURCE_NAME=org_resource_name,
90
+ )
91
+
92
+
93
+ @timeit
94
+ def sync_gcp_folders(
95
+ neo4j_session: neo4j.Session,
96
+ gcp_update_tag: int,
97
+ common_job_parameters: Dict,
98
+ org_resource_name: str,
99
+ ) -> List[Dict]:
100
+ """
101
+ Get GCP folder data using the CRM v2 resource object and load the data to Neo4j.
102
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
103
+ :return: List of folders synced
104
+ """
105
+ logger.debug("Syncing GCP folders")
106
+ folders = get_gcp_folders(org_resource_name)
107
+ load_gcp_folders(neo4j_session, folders, gcp_update_tag, org_resource_name)
108
+ return folders
@@ -0,0 +1,65 @@
1
+ import logging
2
+ from typing import Dict
3
+ from typing import List
4
+
5
+ import neo4j
6
+ from google.cloud import resourcemanager_v3
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.models.gcp.crm.organizations import GCPOrganizationSchema
10
+ from cartography.util import timeit
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @timeit
16
+ def get_gcp_organizations() -> List[Dict]:
17
+ """
18
+ Return list of GCP organizations that the authenticated principal can access using the high-level client.
19
+ Returns empty list on error.
20
+ :return: List of org dicts with keys: name, displayName, lifecycleState.
21
+ """
22
+ client = resourcemanager_v3.OrganizationsClient()
23
+ orgs = []
24
+ for org in client.search_organizations():
25
+ orgs.append(
26
+ {
27
+ "name": org.name,
28
+ "displayName": org.display_name,
29
+ "lifecycleState": org.state.name,
30
+ }
31
+ )
32
+ return orgs
33
+
34
+
35
+ @timeit
36
+ def load_gcp_organizations(
37
+ neo4j_session: neo4j.Session,
38
+ data: List[Dict],
39
+ gcp_update_tag: int,
40
+ ) -> None:
41
+ for org in data:
42
+ org["id"] = org["name"]
43
+
44
+ load(
45
+ neo4j_session,
46
+ GCPOrganizationSchema(),
47
+ data,
48
+ lastupdated=gcp_update_tag,
49
+ )
50
+
51
+
52
+ @timeit
53
+ def sync_gcp_organizations(
54
+ neo4j_session: neo4j.Session,
55
+ gcp_update_tag: int,
56
+ common_job_parameters: Dict,
57
+ ) -> List[Dict]:
58
+ """
59
+ Get GCP organization data using the CRM v1 resource object and load the data to Neo4j.
60
+ Returns the list of organizations synced.
61
+ """
62
+ logger.debug("Syncing GCP organizations")
63
+ data = get_gcp_organizations()
64
+ load_gcp_organizations(neo4j_session, data, gcp_update_tag)
65
+ return data
@@ -0,0 +1,109 @@
1
+ import logging
2
+ from typing import Dict
3
+ from typing import List
4
+
5
+ import neo4j
6
+ from google.cloud import resourcemanager_v3
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.models.gcp.crm.projects import GCPProjectSchema
10
+ from cartography.util import timeit
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @timeit
16
+ def get_gcp_projects(org_resource_name: str, folders: List[Dict]) -> List[Dict]:
17
+ """
18
+ Return list of ACTIVE GCP projects under the specified organization
19
+ and within the specified folders.
20
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
21
+ :param folders: List of folder dictionaries containing 'name' field with full resource names
22
+ """
23
+ folder_names = [folder["name"] for folder in folders] if folders else []
24
+ # Build list of parent resources to check (org and all folders)
25
+ parents = set([org_resource_name] + folder_names)
26
+ results: List[Dict] = []
27
+ for parent in parents:
28
+ client = resourcemanager_v3.ProjectsClient()
29
+ for proj in client.list_projects(parent=parent):
30
+ # list_projects returns ACTIVE projects by default
31
+ name_field = proj.name # "projects/<number>"
32
+ project_number = name_field.split("/")[-1] if name_field else None
33
+ project_parent = proj.parent
34
+ results.append(
35
+ {
36
+ "projectId": getattr(proj, "project_id", None),
37
+ "projectNumber": project_number,
38
+ "name": getattr(proj, "display_name", None),
39
+ "lifecycleState": proj.state.name,
40
+ "parent": project_parent,
41
+ }
42
+ )
43
+ return results
44
+
45
+
46
+ @timeit
47
+ def transform_gcp_projects(data: List[Dict]) -> List[Dict]:
48
+ """
49
+ Transform GCP project data to add parent_org or parent_folder fields based on parent type.
50
+
51
+ :param data: List of project dicts
52
+ :return: List of transformed project dicts with parent_org and parent_folder fields
53
+ """
54
+ for project in data:
55
+ project["parent_org"] = None
56
+ project["parent_folder"] = None
57
+
58
+ # Set parent fields based on parent type
59
+ if project["parent"].startswith("organizations"):
60
+ project["parent_org"] = project["parent"]
61
+ elif project["parent"].startswith("folders"):
62
+ project["parent_folder"] = project["parent"]
63
+ else:
64
+ logger.warning(
65
+ f"Project {project['projectId']} has unexpected parent type: {project['parent']}"
66
+ )
67
+
68
+ return data
69
+
70
+
71
+ @timeit
72
+ def load_gcp_projects(
73
+ neo4j_session: neo4j.Session,
74
+ data: List[Dict],
75
+ gcp_update_tag: int,
76
+ org_resource_name: str,
77
+ ) -> None:
78
+ """
79
+ Load GCP projects into the graph.
80
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
81
+ """
82
+ transformed_data = transform_gcp_projects(data)
83
+ load(
84
+ neo4j_session,
85
+ GCPProjectSchema(),
86
+ transformed_data,
87
+ lastupdated=gcp_update_tag,
88
+ ORG_RESOURCE_NAME=org_resource_name,
89
+ )
90
+
91
+
92
+ @timeit
93
+ def sync_gcp_projects(
94
+ neo4j_session: neo4j.Session,
95
+ org_resource_name: str,
96
+ folders: List[Dict],
97
+ gcp_update_tag: int,
98
+ common_job_parameters: Dict,
99
+ ) -> List[Dict]:
100
+ """
101
+ Get and sync GCP project data to Neo4j.
102
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
103
+ :param folders: List of folder dictionaries containing 'name' field with full resource names
104
+ :return: List of projects synced
105
+ """
106
+ logger.debug("Syncing GCP projects")
107
+ projects = get_gcp_projects(org_resource_name, folders)
108
+ load_gcp_projects(neo4j_session, projects, gcp_update_tag, org_resource_name)
109
+ return projects