cartography 0.111.0rc1__py3-none-any.whl → 0.113.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (81) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +57 -0
  3. cartography/config.py +24 -0
  4. cartography/data/indexes.cypher +0 -6
  5. cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
  6. cartography/intel/aws/apigateway.py +128 -17
  7. cartography/intel/aws/apigatewayv2.py +116 -0
  8. cartography/intel/aws/ec2/instances.py +3 -1
  9. cartography/intel/aws/ec2/network_interfaces.py +1 -1
  10. cartography/intel/aws/ec2/vpc_peerings.py +262 -125
  11. cartography/intel/aws/resources.py +2 -0
  12. cartography/intel/azure/__init__.py +35 -32
  13. cartography/intel/azure/subscription.py +2 -2
  14. cartography/intel/azure/tenant.py +39 -30
  15. cartography/intel/azure/util/credentials.py +49 -174
  16. cartography/intel/entra/__init__.py +47 -1
  17. cartography/intel/entra/applications.py +220 -170
  18. cartography/intel/entra/groups.py +41 -22
  19. cartography/intel/entra/ou.py +28 -20
  20. cartography/intel/entra/users.py +24 -18
  21. cartography/intel/gcp/__init__.py +32 -11
  22. cartography/intel/gcp/compute.py +47 -12
  23. cartography/intel/gcp/dns.py +82 -169
  24. cartography/intel/gcp/iam.py +66 -54
  25. cartography/intel/gcp/storage.py +75 -159
  26. cartography/intel/github/repos.py +19 -10
  27. cartography/intel/github/util.py +12 -0
  28. cartography/intel/keycloak/__init__.py +153 -0
  29. cartography/intel/keycloak/authenticationexecutions.py +322 -0
  30. cartography/intel/keycloak/authenticationflows.py +77 -0
  31. cartography/intel/keycloak/clients.py +187 -0
  32. cartography/intel/keycloak/groups.py +126 -0
  33. cartography/intel/keycloak/identityproviders.py +94 -0
  34. cartography/intel/keycloak/organizations.py +163 -0
  35. cartography/intel/keycloak/realms.py +61 -0
  36. cartography/intel/keycloak/roles.py +202 -0
  37. cartography/intel/keycloak/scopes.py +73 -0
  38. cartography/intel/keycloak/users.py +70 -0
  39. cartography/intel/keycloak/util.py +47 -0
  40. cartography/intel/kubernetes/__init__.py +26 -0
  41. cartography/intel/kubernetes/eks.py +402 -0
  42. cartography/intel/kubernetes/rbac.py +133 -0
  43. cartography/models/aws/apigateway/apigatewayintegration.py +79 -0
  44. cartography/models/aws/apigateway/apigatewaymethod.py +74 -0
  45. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  46. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  47. cartography/models/aws/ec2/vpc_peering.py +157 -0
  48. cartography/models/azure/principal.py +44 -0
  49. cartography/models/azure/tenant.py +20 -0
  50. cartography/models/gcp/dns.py +109 -0
  51. cartography/models/gcp/iam.py +3 -0
  52. cartography/models/gcp/storage/__init__.py +0 -0
  53. cartography/models/gcp/storage/bucket.py +119 -0
  54. cartography/models/keycloak/__init__.py +0 -0
  55. cartography/models/keycloak/authenticationexecution.py +160 -0
  56. cartography/models/keycloak/authenticationflow.py +54 -0
  57. cartography/models/keycloak/client.py +177 -0
  58. cartography/models/keycloak/group.py +101 -0
  59. cartography/models/keycloak/identityprovider.py +89 -0
  60. cartography/models/keycloak/organization.py +116 -0
  61. cartography/models/keycloak/organizationdomain.py +73 -0
  62. cartography/models/keycloak/realm.py +173 -0
  63. cartography/models/keycloak/role.py +126 -0
  64. cartography/models/keycloak/scope.py +73 -0
  65. cartography/models/keycloak/user.py +51 -0
  66. cartography/models/kubernetes/clusterrolebindings.py +40 -0
  67. cartography/models/kubernetes/groups.py +107 -0
  68. cartography/models/kubernetes/oidc.py +51 -0
  69. cartography/models/kubernetes/rolebindings.py +40 -0
  70. cartography/models/kubernetes/users.py +105 -0
  71. cartography/sync.py +2 -0
  72. cartography/util.py +10 -0
  73. {cartography-0.111.0rc1.dist-info → cartography-0.113.0.dist-info}/METADATA +9 -5
  74. {cartography-0.111.0rc1.dist-info → cartography-0.113.0.dist-info}/RECORD +78 -41
  75. cartography/data/jobs/cleanup/aws_import_vpc_peering_cleanup.json +0 -45
  76. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  77. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  78. {cartography-0.111.0rc1.dist-info → cartography-0.113.0.dist-info}/WHEEL +0 -0
  79. {cartography-0.111.0rc1.dist-info → cartography-0.113.0.dist-info}/entry_points.txt +0 -0
  80. {cartography-0.111.0rc1.dist-info → cartography-0.113.0.dist-info}/licenses/LICENSE +0 -0
  81. {cartography-0.111.0rc1.dist-info → cartography-0.113.0.dist-info}/top_level.txt +0 -0
@@ -7,28 +7,20 @@ import neo4j
7
7
  from googleapiclient.discovery import HttpError
8
8
  from googleapiclient.discovery import Resource
9
9
 
10
- from cartography.util import run_cleanup_job
10
+ from cartography.client.core.tx import load
11
+ from cartography.graph.job import GraphJob
12
+ from cartography.models.gcp.dns import GCPDNSZoneSchema
13
+ from cartography.models.gcp.dns import GCPRecordSetSchema
11
14
  from cartography.util import timeit
12
15
 
13
16
  logger = logging.getLogger(__name__)
14
17
 
15
18
 
16
19
  @timeit
17
- def get_dns_zones(dns: Resource, project_id: str) -> List[Resource]:
18
- """
19
- Returns a list of DNS zones within the given project.
20
-
21
- :type dns: The GCP DNS resource object
22
- :param dns: The DNS resource object created by googleapiclient.discovery.build()
23
-
24
- :type project_id: str
25
- :param project_id: Current Google Project Id
26
-
27
- :rtype: list
28
- :return: List of DNS zones
29
- """
20
+ def get_dns_zones(dns: Resource, project_id: str) -> List[Dict]:
21
+ """Returns a list of DNS zones within the given project."""
30
22
  try:
31
- zones = []
23
+ zones: List[Dict] = []
32
24
  request = dns.managedZones().list(project=project_id)
33
25
  while request is not None:
34
26
  response = request.execute()
@@ -47,40 +39,22 @@ def get_dns_zones(dns: Resource, project_id: str) -> List[Resource]:
47
39
  ):
48
40
  logger.warning(
49
41
  (
50
- "Could not retrieve DNS zones on project %s due to permissions issues. Code: %s, Message: %s"
42
+ "Could not retrieve DNS zones on project %s due to permissions issues. "
43
+ "Code: %s, Message: %s"
51
44
  ),
52
45
  project_id,
53
46
  err["code"],
54
47
  err["message"],
55
48
  )
56
49
  return []
57
- else:
58
- raise
50
+ raise
59
51
 
60
52
 
61
53
  @timeit
62
- def get_dns_rrs(
63
- dns: Resource,
64
- dns_zones: List[Dict],
65
- project_id: str,
66
- ) -> List[Resource]:
67
- """
68
- Returns a list of DNS Resource Record Sets within the given project.
69
-
70
- :type dns: The GCP DNS resource object
71
- :param dns: The DNS resource object created by googleapiclient.discovery.build()
72
-
73
- :type dns_zones: list
74
- :param dns_zones: List of DNS zones for the project
75
-
76
- :type project_id: str
77
- :param project_id: Current Google Project Id
78
-
79
- :rtype: list
80
- :return: List of Resource Record Sets
81
- """
54
+ def get_dns_rrs(dns: Resource, dns_zones: List[Dict], project_id: str) -> List[Dict]:
55
+ """Returns a list of DNS Resource Record Sets within the given project."""
82
56
  try:
83
- rrs: List[Resource] = []
57
+ rrs: List[Dict] = []
84
58
  for zone in dns_zones:
85
59
  request = dns.resourceRecordSets().list(
86
60
  project=project_id,
@@ -104,16 +78,53 @@ def get_dns_rrs(
104
78
  ):
105
79
  logger.warning(
106
80
  (
107
- "Could not retrieve DNS RRS on project %s due to permissions issues. Code: %s, Message: %s"
81
+ "Could not retrieve DNS RRS on project %s due to permissions issues. "
82
+ "Code: %s, Message: %s"
108
83
  ),
109
84
  project_id,
110
85
  err["code"],
111
86
  err["message"],
112
87
  )
113
88
  return []
114
- else:
115
- raise
116
- raise e
89
+ raise
90
+
91
+
92
+ @timeit
93
+ def transform_dns_zones(dns_zones: List[Dict]) -> List[Dict]:
94
+ """Transform raw DNS zone responses into Neo4j-ready dicts."""
95
+ zones: List[Dict] = []
96
+ for z in dns_zones:
97
+ zones.append(
98
+ {
99
+ "id": z["id"],
100
+ "name": z.get("name"),
101
+ "dns_name": z.get("dnsName"),
102
+ "description": z.get("description"),
103
+ "visibility": z.get("visibility"),
104
+ "kind": z.get("kind"),
105
+ "nameservers": z.get("nameServers"),
106
+ "created_at": z.get("creationTime"),
107
+ }
108
+ )
109
+ return zones
110
+
111
+
112
+ @timeit
113
+ def transform_dns_rrs(dns_rrs: List[Dict]) -> List[Dict]:
114
+ """Transform raw DNS record set responses into Neo4j-ready dicts."""
115
+ records: List[Dict] = []
116
+ for r in dns_rrs:
117
+ records.append(
118
+ {
119
+ "id": r["name"],
120
+ "name": r["name"],
121
+ "type": r.get("type"),
122
+ "ttl": r.get("ttl"),
123
+ "data": r.get("rrdatas"),
124
+ "zone_id": r.get("zone"),
125
+ }
126
+ )
127
+ return records
117
128
 
118
129
 
119
130
  @timeit
@@ -123,102 +134,30 @@ def load_dns_zones(
123
134
  project_id: str,
124
135
  gcp_update_tag: int,
125
136
  ) -> None:
126
- """
127
- Ingest GCP DNS Zones into Neo4j
128
-
129
- :type neo4j_session: Neo4j session object
130
- :param neo4j session: The Neo4j session object
131
-
132
- :type dns_resp: Dict
133
- :param dns_resp: A DNS response object from the GKE API
134
-
135
- :type project_id: str
136
- :param project_id: Current Google Project Id
137
-
138
- :type gcp_update_tag: timestamp
139
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
140
-
141
- :rtype: NoneType
142
- :return: Nothing
143
- """
144
-
145
- ingest_records = """
146
- UNWIND $records as record
147
- MERGE(zone:GCPDNSZone{id:record.id})
148
- ON CREATE SET
149
- zone.firstseen = timestamp(),
150
- zone.created_at = record.creationTime
151
- SET
152
- zone.name = record.name,
153
- zone.dns_name = record.dnsName,
154
- zone.description = record.description,
155
- zone.visibility = record.visibility,
156
- zone.kind = record.kind,
157
- zone.nameservers = record.nameServers,
158
- zone.lastupdated = $gcp_update_tag
159
- WITH zone
160
- MATCH (owner:GCPProject{id:$ProjectId})
161
- MERGE (owner)-[r:RESOURCE]->(zone)
162
- ON CREATE SET
163
- r.firstseen = timestamp(),
164
- r.lastupdated = $gcp_update_tag
165
- """
166
- neo4j_session.run(
167
- ingest_records,
168
- records=dns_zones,
169
- ProjectId=project_id,
170
- gcp_update_tag=gcp_update_tag,
137
+ """Ingest GCP DNS Zones into Neo4j."""
138
+ load(
139
+ neo4j_session,
140
+ GCPDNSZoneSchema(),
141
+ dns_zones,
142
+ lastupdated=gcp_update_tag,
143
+ PROJECT_ID=project_id,
171
144
  )
172
145
 
173
146
 
174
147
  @timeit
175
148
  def load_rrs(
176
149
  neo4j_session: neo4j.Session,
177
- dns_rrs: List[Resource],
150
+ dns_rrs: List[Dict],
178
151
  project_id: str,
179
152
  gcp_update_tag: int,
180
153
  ) -> None:
181
- """
182
- Ingest GCP RRS into Neo4j
183
-
184
- :type neo4j_session: Neo4j session object
185
- :param neo4j session: The Neo4j session object
186
-
187
- :type dns_rrs: list
188
- :param dns_rrs: A list of RRS
189
-
190
- :type project_id: str
191
- :param project_id: Current Google Project Id
192
-
193
- :type gcp_update_tag: timestamp
194
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
195
-
196
- :rtype: NoneType
197
- :return: Nothing
198
- """
199
-
200
- ingest_records = """
201
- UNWIND $records as record
202
- MERGE(rrs:GCPRecordSet{id:record.name})
203
- ON CREATE SET
204
- rrs.firstseen = timestamp()
205
- SET
206
- rrs.name = record.name,
207
- rrs.type = record.type,
208
- rrs.ttl = record.ttl,
209
- rrs.data = record.rrdatas,
210
- rrs.lastupdated = $gcp_update_tag
211
- WITH rrs, record
212
- MATCH (zone:GCPDNSZone{id:record.zone})
213
- MERGE (zone)-[r:HAS_RECORD]->(rrs)
214
- ON CREATE SET
215
- r.firstseen = timestamp(),
216
- r.lastupdated = $gcp_update_tag
217
- """
218
- neo4j_session.run(
219
- ingest_records,
220
- records=dns_rrs,
221
- gcp_update_tag=gcp_update_tag,
154
+ """Ingest GCP DNS Resource Record Sets into Neo4j."""
155
+ load(
156
+ neo4j_session,
157
+ GCPRecordSetSchema(),
158
+ dns_rrs,
159
+ lastupdated=gcp_update_tag,
160
+ PROJECT_ID=project_id,
222
161
  )
223
162
 
224
163
 
@@ -227,19 +166,14 @@ def cleanup_dns_records(
227
166
  neo4j_session: neo4j.Session,
228
167
  common_job_parameters: Dict,
229
168
  ) -> None:
230
- """
231
- Delete out-of-date GCP DNS Zones and RRS nodes and relationships
232
-
233
- :type neo4j_session: The Neo4j session object
234
- :param neo4j_session: The Neo4j session
235
-
236
- :type common_job_parameters: dict
237
- :param common_job_parameters: Dictionary of other job parameters to pass to Neo4j
238
-
239
- :rtype: NoneType
240
- :return: Nothing
241
- """
242
- run_cleanup_job("gcp_dns_cleanup.json", neo4j_session, common_job_parameters)
169
+ """Delete out-of-date GCP DNS Zones and Record Sets nodes and relationships."""
170
+ # Record sets depend on zones, so clean them up first.
171
+ GraphJob.from_node_schema(GCPRecordSetSchema(), common_job_parameters).run(
172
+ neo4j_session,
173
+ )
174
+ GraphJob.from_node_schema(GCPDNSZoneSchema(), common_job_parameters).run(
175
+ neo4j_session,
176
+ )
243
177
 
244
178
 
245
179
  @timeit
@@ -250,33 +184,12 @@ def sync(
250
184
  gcp_update_tag: int,
251
185
  common_job_parameters: Dict,
252
186
  ) -> None:
253
- """
254
- Get GCP DNS Zones and Resource Record Sets using the DNS resource object, ingest to Neo4j, and clean up old data.
255
-
256
- :type neo4j_session: The Neo4j session object
257
- :param neo4j_session: The Neo4j session
258
-
259
- :type dns: The DNS resource object created by googleapiclient.discovery.build()
260
- :param dns: The GCP DNS resource object
261
-
262
- :type project_id: str
263
- :param project_id: The project ID of the corresponding project
264
-
265
- :type gcp_update_tag: timestamp
266
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
267
-
268
- :type common_job_parameters: dict
269
- :param common_job_parameters: Dictionary of other job parameters to pass to Neo4j
270
-
271
- :rtype: NoneType
272
- :return: Nothing
273
- """
187
+ """Get GCP DNS Zones and Record Sets, load them into Neo4j, and clean up old data."""
274
188
  logger.info("Syncing DNS records for project %s.", project_id)
275
- # DNS ZONES
276
- dns_zones = get_dns_zones(dns, project_id)
189
+ dns_zones_resp = get_dns_zones(dns, project_id)
190
+ dns_zones = transform_dns_zones(dns_zones_resp)
277
191
  load_dns_zones(neo4j_session, dns_zones, project_id, gcp_update_tag)
278
- # RECORD SETS
279
- dns_rrs = get_dns_rrs(dns, dns_zones, project_id)
192
+ dns_rrs_resp = get_dns_rrs(dns, dns_zones_resp, project_id)
193
+ dns_rrs = transform_dns_rrs(dns_rrs_resp)
280
194
  load_rrs(neo4j_session, dns_rrs, project_id, gcp_update_tag)
281
- # TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
282
195
  cleanup_dns_records(neo4j_session, common_job_parameters)
@@ -90,6 +90,29 @@ def get_gcp_roles(iam_client: Resource, project_id: str) -> List[Dict]:
90
90
  return []
91
91
 
92
92
 
93
+ def transform_gcp_service_accounts(
94
+ raw_accounts: List[Dict[str, Any]],
95
+ project_id: str,
96
+ ) -> List[Dict[str, Any]]:
97
+ """
98
+ Transform raw GCP service accounts into loader-friendly dicts.
99
+ """
100
+ result: List[Dict[str, Any]] = []
101
+ for sa in raw_accounts:
102
+ result.append(
103
+ {
104
+ "id": sa["uniqueId"],
105
+ "email": sa.get("email"),
106
+ "displayName": sa.get("displayName"),
107
+ "oauth2ClientId": sa.get("oauth2ClientId"),
108
+ "uniqueId": sa.get("uniqueId"),
109
+ "disabled": sa.get("disabled", False),
110
+ "projectId": project_id,
111
+ },
112
+ )
113
+ return result
114
+
115
+
93
116
  @timeit
94
117
  def load_gcp_service_accounts(
95
118
  neo4j_session: neo4j.Session,
@@ -99,38 +122,55 @@ def load_gcp_service_accounts(
99
122
  ) -> None:
100
123
  """
101
124
  Load GCP service account data into Neo4j.
102
-
103
- :param neo4j_session: The Neo4j session.
104
- :param service_accounts: A list of service account data to load.
105
- :param project_id: The GCP Project ID associated with the service accounts.
106
- :param gcp_update_tag: The timestamp of the current sync run.
107
125
  """
108
126
  logger.debug(
109
127
  f"Loading {len(service_accounts)} service accounts for project {project_id}"
110
128
  )
111
- transformed_service_accounts = []
112
- for sa in service_accounts:
113
- transformed_sa = {
114
- "id": sa["uniqueId"],
115
- "email": sa.get("email"),
116
- "displayName": sa.get("displayName"),
117
- "oauth2ClientId": sa.get("oauth2ClientId"),
118
- "uniqueId": sa.get("uniqueId"),
119
- "disabled": sa.get("disabled", False),
120
- "projectId": project_id,
121
- }
122
- transformed_service_accounts.append(transformed_sa)
123
129
 
124
130
  load(
125
131
  neo4j_session,
126
132
  GCPServiceAccountSchema(),
127
- transformed_service_accounts,
133
+ service_accounts,
128
134
  lastupdated=gcp_update_tag,
129
135
  projectId=project_id,
130
- additional_labels=["GCPPrincipal"],
131
136
  )
132
137
 
133
138
 
139
+ def transform_gcp_roles(
140
+ raw_roles: List[Dict[str, Any]],
141
+ project_id: str,
142
+ ) -> List[Dict[str, Any]]:
143
+ """
144
+ Transform raw GCP roles into loader-friendly dicts.
145
+ """
146
+ result: List[Dict[str, Any]] = []
147
+ for role in raw_roles:
148
+ role_name = role["name"]
149
+ if role_name.startswith("roles/"):
150
+ role_type = (
151
+ "BASIC"
152
+ if role_name in ["roles/owner", "roles/editor", "roles/viewer"]
153
+ else "PREDEFINED"
154
+ )
155
+ else:
156
+ role_type = "CUSTOM"
157
+
158
+ result.append(
159
+ {
160
+ "id": role_name,
161
+ "name": role_name,
162
+ "title": role.get("title"),
163
+ "description": role.get("description"),
164
+ "deleted": role.get("deleted", False),
165
+ "etag": role.get("etag"),
166
+ "includedPermissions": role.get("includedPermissions", []),
167
+ "roleType": role_type,
168
+ "projectId": project_id,
169
+ },
170
+ )
171
+ return result
172
+
173
+
134
174
  @timeit
135
175
  def load_gcp_roles(
136
176
  neo4j_session: neo4j.Session,
@@ -140,41 +180,13 @@ def load_gcp_roles(
140
180
  ) -> None:
141
181
  """
142
182
  Load GCP role data into Neo4j.
143
-
144
- :param neo4j_session: The Neo4j session.
145
- :param roles: A list of role data to load.
146
- :param project_id: The GCP Project ID associated with the roles.
147
- :param gcp_update_tag: The timestamp of the current sync run.
148
183
  """
149
184
  logger.debug(f"Loading {len(roles)} roles for project {project_id}")
150
- transformed_roles = []
151
- for role in roles:
152
- role_name = role["name"]
153
- if role_name.startswith("roles/"):
154
- if role_name in ["roles/owner", "roles/editor", "roles/viewer"]:
155
- role_type = "BASIC"
156
- else:
157
- role_type = "PREDEFINED"
158
- else:
159
- role_type = "CUSTOM"
160
-
161
- transformed_role = {
162
- "id": role_name,
163
- "name": role_name,
164
- "title": role.get("title"),
165
- "description": role.get("description"),
166
- "deleted": role.get("deleted", False),
167
- "etag": role.get("etag"),
168
- "includedPermissions": role.get("includedPermissions", []),
169
- "roleType": role_type,
170
- "projectId": project_id,
171
- }
172
- transformed_roles.append(transformed_role)
173
185
 
174
186
  load(
175
187
  neo4j_session,
176
188
  GCPRoleSchema(),
177
- transformed_roles,
189
+ roles,
178
190
  lastupdated=gcp_update_tag,
179
191
  projectId=project_id,
180
192
  )
@@ -224,18 +236,18 @@ def sync(
224
236
  """
225
237
  logger.info(f"Syncing GCP IAM for project {project_id}")
226
238
 
227
- # Get and load service accounts
228
- service_accounts = get_gcp_service_accounts(iam_client, project_id)
239
+ service_accounts_raw = get_gcp_service_accounts(iam_client, project_id)
229
240
  logger.info(
230
- f"Found {len(service_accounts)} service accounts in project {project_id}"
241
+ f"Found {len(service_accounts_raw)} service accounts in project {project_id}"
231
242
  )
243
+ service_accounts = transform_gcp_service_accounts(service_accounts_raw, project_id)
232
244
  load_gcp_service_accounts(
233
245
  neo4j_session, service_accounts, project_id, gcp_update_tag
234
246
  )
235
247
 
236
- # Get and load roles
237
- roles = get_gcp_roles(iam_client, project_id)
238
- logger.info(f"Found {len(roles)} roles in project {project_id}")
248
+ roles_raw = get_gcp_roles(iam_client, project_id)
249
+ logger.info(f"Found {len(roles_raw)} roles in project {project_id}")
250
+ roles = transform_gcp_roles(roles_raw, project_id)
239
251
  load_gcp_roles(neo4j_session, roles, project_id, gcp_update_tag)
240
252
 
241
253
  # Run cleanup