cartography 0.112.0__py3-none-any.whl → 0.114.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (82) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +8 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -31
  5. cartography/intel/aws/apigatewayv2.py +116 -0
  6. cartography/intel/aws/iam.py +741 -492
  7. cartography/intel/aws/organizations.py +7 -8
  8. cartography/intel/aws/permission_relationships.py +4 -16
  9. cartography/intel/aws/resources.py +2 -0
  10. cartography/intel/azure/__init__.py +16 -0
  11. cartography/intel/azure/app_service.py +105 -0
  12. cartography/intel/azure/functions.py +124 -0
  13. cartography/intel/entra/__init__.py +31 -0
  14. cartography/intel/entra/app_role_assignments.py +277 -0
  15. cartography/intel/entra/applications.py +4 -238
  16. cartography/intel/entra/federation/__init__.py +0 -0
  17. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  18. cartography/intel/entra/service_principals.py +217 -0
  19. cartography/intel/gcp/__init__.py +136 -436
  20. cartography/intel/gcp/clients.py +65 -0
  21. cartography/intel/gcp/compute.py +18 -44
  22. cartography/intel/gcp/crm/__init__.py +0 -0
  23. cartography/intel/gcp/crm/folders.py +108 -0
  24. cartography/intel/gcp/crm/orgs.py +65 -0
  25. cartography/intel/gcp/crm/projects.py +109 -0
  26. cartography/intel/gcp/dns.py +82 -169
  27. cartography/intel/gcp/gke.py +72 -113
  28. cartography/intel/gcp/iam.py +66 -54
  29. cartography/intel/gcp/storage.py +75 -159
  30. cartography/intel/github/__init__.py +41 -0
  31. cartography/intel/github/commits.py +423 -0
  32. cartography/intel/github/repos.py +73 -39
  33. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  34. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  35. cartography/models/aws/iam/access_key.py +103 -0
  36. cartography/models/aws/iam/account_role.py +24 -0
  37. cartography/models/aws/iam/federated_principal.py +60 -0
  38. cartography/models/aws/iam/group.py +60 -0
  39. cartography/models/aws/iam/group_membership.py +26 -0
  40. cartography/models/aws/iam/inline_policy.py +78 -0
  41. cartography/models/aws/iam/managed_policy.py +51 -0
  42. cartography/models/aws/iam/policy_statement.py +57 -0
  43. cartography/models/aws/iam/role.py +83 -0
  44. cartography/models/aws/iam/root_principal.py +52 -0
  45. cartography/models/aws/iam/service_principal.py +30 -0
  46. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  47. cartography/models/aws/iam/user.py +54 -0
  48. cartography/models/azure/__init__.py +0 -0
  49. cartography/models/azure/app_service.py +59 -0
  50. cartography/models/azure/function_app.py +59 -0
  51. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  52. cartography/models/entra/service_principal.py +104 -0
  53. cartography/models/gcp/compute/subnet.py +74 -0
  54. cartography/models/gcp/crm/__init__.py +0 -0
  55. cartography/models/gcp/crm/folders.py +98 -0
  56. cartography/models/gcp/crm/organizations.py +21 -0
  57. cartography/models/gcp/crm/projects.py +100 -0
  58. cartography/models/gcp/dns.py +109 -0
  59. cartography/models/gcp/gke.py +69 -0
  60. cartography/models/gcp/iam.py +3 -0
  61. cartography/models/gcp/storage/__init__.py +0 -0
  62. cartography/models/gcp/storage/bucket.py +119 -0
  63. cartography/models/github/commits.py +63 -0
  64. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
  65. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/RECORD +69 -39
  66. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  67. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  68. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  69. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  70. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  71. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  72. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  73. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  74. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  75. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  76. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  77. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  78. cartography/intel/gcp/crm.py +0 -355
  79. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
  80. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
  81. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
  82. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
@@ -1,355 +0,0 @@
1
- # Google Compute Resource Manager
2
- # https://cloud.google.com/resource-manager/docs/cloud-platform-resource-hierarchy
3
- import logging
4
- from string import Template
5
- from typing import Dict
6
- from typing import List
7
-
8
- import neo4j
9
- from googleapiclient.discovery import HttpError
10
- from googleapiclient.discovery import Resource
11
-
12
- from cartography.util import run_cleanup_job
13
- from cartography.util import timeit
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
-
18
- @timeit
19
- def get_gcp_organizations(crm_v1: Resource) -> List[Resource]:
20
- """
21
- Return list of GCP organizations that the crm_v1 resource object has permissions to access.
22
- Returns empty list if we are unable to enumerate organizations for any reason.
23
- :param crm_v1: The Compute Resource Manager v1 resource object created by `googleapiclient.discovery.build()`.
24
- See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
25
- :return: List of GCP Organizations. See https://cloud.google.com/resource-manager/reference/rest/v1/organizations.
26
- """
27
- try:
28
- req = crm_v1.organizations().search(body={})
29
- res = req.execute()
30
- return res.get("organizations", [])
31
- except HttpError as e:
32
- logger.warning(
33
- "HttpError occurred in crm.get_gcp_organizations(), returning empty list. Details: %r",
34
- e,
35
- )
36
- return []
37
-
38
-
39
- @timeit
40
- def get_gcp_folders(crm_v2: Resource) -> List[Resource]:
41
- """
42
- Return list of GCP folders that the crm_v2 resource object has permissions to access.
43
- Returns empty list if we are unable to enumerate folders for any reason.
44
- :param crm_v2: The Compute Resource Manager v2 resource object created by `googleapiclient.discovery.build()`.
45
- See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
46
- :return: List of GCP folders. See https://cloud.google.com/resource-manager/reference/rest/v2/folders/list.
47
- """
48
- try:
49
- req = crm_v2.folders().search(body={})
50
- res = req.execute()
51
- return res.get("folders", [])
52
- except HttpError as e:
53
- logger.warning(
54
- "HttpError occurred in crm.get_gcp_folders(), returning empty list. Details: %r",
55
- e,
56
- )
57
- return []
58
-
59
-
60
- @timeit
61
- def get_gcp_projects(crm_v1: Resource) -> List[Resource]:
62
- """
63
- Return list of GCP projects that the crm_v1 resource object has permissions to access.
64
- Returns empty list if we are unable to enumerate projects for any reason.
65
- :param crm_v1: The Compute Resource Manager v1 resource object created by `googleapiclient.discovery.build()`.
66
- See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
67
- :return: List of GCP projects. See https://cloud.google.com/resource-manager/reference/rest/v2/projects/list.
68
- """
69
- try:
70
- projects: List[Resource] = []
71
- req = crm_v1.projects().list(filter="lifecycleState:ACTIVE")
72
- while req is not None:
73
- res = req.execute()
74
- page = res.get("projects", [])
75
- projects.extend(page)
76
- req = crm_v1.projects().list_next(
77
- previous_request=req,
78
- previous_response=res,
79
- )
80
- return projects
81
- except HttpError as e:
82
- logger.warning(
83
- "HttpError occurred in crm.get_gcp_projects(), returning empty list. Details: %r",
84
- e,
85
- )
86
- return []
87
-
88
-
89
- @timeit
90
- def load_gcp_organizations(
91
- neo4j_session: neo4j.Session,
92
- data: List[Dict],
93
- gcp_update_tag: int,
94
- ) -> None:
95
- """
96
- Ingest the GCP organizations to Neo4j
97
- :param neo4j_session: The Neo4j session
98
- :param data: List of organizations; output from crm.get_gcp_organizations()
99
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
100
- :return: Nothing
101
- """
102
- query = """
103
- MERGE (org:GCPOrganization{id:$OrgName})
104
- ON CREATE SET org.firstseen = timestamp()
105
- SET org.orgname = $OrgName,
106
- org.displayname = $DisplayName,
107
- org.lifecyclestate = $LifecycleState,
108
- org.lastupdated = $gcp_update_tag
109
- """
110
- for org_object in data:
111
- neo4j_session.run(
112
- query,
113
- OrgName=org_object["name"],
114
- DisplayName=org_object.get("displayName", None),
115
- LifecycleState=org_object.get("lifecycleState", None),
116
- gcp_update_tag=gcp_update_tag,
117
- )
118
-
119
-
120
- @timeit
121
- def load_gcp_folders(
122
- neo4j_session: neo4j.Session,
123
- data: List[Dict],
124
- gcp_update_tag: int,
125
- ) -> None:
126
- """
127
- Ingest the GCP folders to Neo4j
128
- :param neo4j_session: The Neo4j session
129
- :param data: List of folders; output from crm.get_gcp_folders()
130
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
131
- :return: Nothing
132
- """
133
- for folder in data:
134
- # Get the correct parent type.
135
- # Parents of folders can only be GCPOrganizations or other folders, see
136
- # https://cloud.google.com/resource-manager/docs/cloud-platform-resource-hierarchy
137
- if folder["parent"].startswith("organizations"):
138
- query = "MATCH (parent:GCPOrganization{id:$ParentId})"
139
- elif folder["parent"].startswith("folders"):
140
- query = """
141
- MERGE (parent:GCPFolder{id:$ParentId})
142
- ON CREATE SET parent.firstseen = timestamp()
143
- """
144
- query += """
145
- MERGE (folder:GCPFolder{id:$FolderName})
146
- ON CREATE SET folder.firstseen = timestamp()
147
- SET folder.foldername = $FolderName,
148
- folder.displayname = $DisplayName,
149
- folder.lifecyclestate = $LifecycleState,
150
- folder.lastupdated = $gcp_update_tag
151
- WITH parent, folder
152
- MERGE (parent)-[r:RESOURCE]->(folder)
153
- ON CREATE SET r.firstseen = timestamp()
154
- SET r.lastupdated = $gcp_update_tag
155
- """
156
- neo4j_session.run(
157
- query,
158
- ParentId=folder["parent"],
159
- FolderName=folder["name"],
160
- DisplayName=folder.get("displayName", None),
161
- LifecycleState=folder.get("lifecycleState", None),
162
- gcp_update_tag=gcp_update_tag,
163
- )
164
-
165
-
166
- @timeit
167
- def load_gcp_projects(
168
- neo4j_session: neo4j.Session,
169
- data: List[Dict],
170
- gcp_update_tag: int,
171
- ) -> None:
172
- """
173
- Ingest the GCP projects to Neo4j
174
- :param neo4j_session: The Neo4j session
175
- :param data: List of GCP projects; output from crm.get_gcp_projects()
176
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
177
- :return: Nothing
178
- """
179
- query = """
180
- MERGE (project:GCPProject{id:$ProjectId})
181
- ON CREATE SET project.firstseen = timestamp()
182
- SET project.projectid = $ProjectId,
183
- project.projectnumber = $ProjectNumber,
184
- project.displayname = $DisplayName,
185
- project.lifecyclestate = $LifecycleState,
186
- project.lastupdated = $gcp_update_tag
187
- """
188
-
189
- for project in data:
190
- neo4j_session.run(
191
- query,
192
- ProjectId=project["projectId"],
193
- ProjectNumber=project["projectNumber"],
194
- DisplayName=project.get("name", None),
195
- LifecycleState=project.get("lifecycleState", None),
196
- gcp_update_tag=gcp_update_tag,
197
- )
198
- if project.get("parent"):
199
- _attach_gcp_project_parent(neo4j_session, project, gcp_update_tag)
200
-
201
-
202
- @timeit
203
- def _attach_gcp_project_parent(
204
- neo4j_session: neo4j.Session,
205
- project: Dict,
206
- gcp_update_tag: int,
207
- ) -> None:
208
- """
209
- Attach a project to its respective parent, as in the Resource Hierarchy -
210
- https://cloud.google.com/resource-manager/docs/cloud-platform-resource-hierarchy
211
- """
212
- if project["parent"]["type"] == "organization":
213
- parent_label = "GCPOrganization"
214
- elif project["parent"]["type"] == "folder":
215
- parent_label = "GCPFolder"
216
- else:
217
- raise NotImplementedError(
218
- "Ingestion of GCP {}s as parent nodes is currently not supported. "
219
- "Please file an issue at https://github.com/cartography-cncf/cartography/issues.".format(
220
- project["parent"]["type"],
221
- ),
222
- )
223
- parent_id = f"{project['parent']['type']}s/{project['parent']['id']}"
224
- INGEST_PARENT_TEMPLATE = Template(
225
- """
226
- MATCH (project:GCPProject{id:$ProjectId})
227
-
228
- MERGE (parent:$parent_label{id:$ParentId})
229
- ON CREATE SET parent.firstseen = timestamp()
230
-
231
- MERGE (parent)-[r:RESOURCE]->(project)
232
- ON CREATE SET r.firstseen = timestamp()
233
- SET r.lastupdated = $gcp_update_tag
234
- """,
235
- )
236
- neo4j_session.run(
237
- INGEST_PARENT_TEMPLATE.safe_substitute(parent_label=parent_label),
238
- ParentId=parent_id,
239
- ProjectId=project["projectId"],
240
- gcp_update_tag=gcp_update_tag,
241
- )
242
-
243
-
244
- @timeit
245
- def cleanup_gcp_organizations(
246
- neo4j_session: neo4j.Session,
247
- common_job_parameters: Dict,
248
- ) -> None:
249
- """
250
- Remove stale GCP organizations and their relationships
251
- :param neo4j_session: The Neo4j session
252
- :param common_job_parameters: Parameters to carry to the cleanup job
253
- :return: Nothing
254
- """
255
- run_cleanup_job(
256
- "gcp_crm_organization_cleanup.json",
257
- neo4j_session,
258
- common_job_parameters,
259
- )
260
-
261
-
262
- @timeit
263
- def cleanup_gcp_folders(
264
- neo4j_session: neo4j.Session,
265
- common_job_parameters: Dict,
266
- ) -> None:
267
- """
268
- Remove stale GCP folders and their relationships
269
- :param neo4j_session: The Neo4j session
270
- :param common_job_parameters: Parameters to carry to the cleanup job
271
- :return: Nothing
272
- """
273
- run_cleanup_job("gcp_crm_folder_cleanup.json", neo4j_session, common_job_parameters)
274
-
275
-
276
- @timeit
277
- def cleanup_gcp_projects(
278
- neo4j_session: neo4j.Session,
279
- common_job_parameters: Dict,
280
- ) -> None:
281
- """
282
- Remove stale GCP projects and their relationships
283
- :param neo4j_session: The Neo4j session
284
- :param common_job_parameters: Parameters to carry to the cleanup job
285
- :return: Nothing
286
- """
287
- run_cleanup_job(
288
- "gcp_crm_project_cleanup.json",
289
- neo4j_session,
290
- common_job_parameters,
291
- )
292
-
293
-
294
- @timeit
295
- def sync_gcp_organizations(
296
- neo4j_session: neo4j.Session,
297
- crm_v1: Resource,
298
- gcp_update_tag: int,
299
- common_job_parameters: Dict,
300
- ) -> None:
301
- """
302
- Get GCP organization data using the CRM v1 resource object, load the data to Neo4j, and clean up stale nodes.
303
- :param neo4j_session: The Neo4j session
304
- :param crm_v1: The Compute Resource Manager v1 resource object created by `googleapiclient.discovery.build()`.
305
- See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
306
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
307
- :param common_job_parameters: Parameters to carry to the Neo4j jobs
308
- :return: Nothing
309
- """
310
- logger.debug("Syncing GCP organizations")
311
- data = get_gcp_organizations(crm_v1)
312
- load_gcp_organizations(neo4j_session, data, gcp_update_tag)
313
- cleanup_gcp_organizations(neo4j_session, common_job_parameters)
314
-
315
-
316
- @timeit
317
- def sync_gcp_folders(
318
- neo4j_session: neo4j.Session,
319
- crm_v2: Resource,
320
- gcp_update_tag: int,
321
- common_job_parameters: Dict,
322
- ) -> None:
323
- """
324
- Get GCP folder data using the CRM v2 resource object, load the data to Neo4j, and clean up stale nodes.
325
- :param neo4j_session: The Neo4j session
326
- :param crm_v2: The Compute Resource Manager v2 resource object created by `googleapiclient.discovery.build()`.
327
- See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
328
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
329
- :param common_job_parameters: Parameters to carry to the Neo4j jobs
330
- :return: Nothing
331
- """
332
- logger.debug("Syncing GCP folders")
333
- folders = get_gcp_folders(crm_v2)
334
- load_gcp_folders(neo4j_session, folders, gcp_update_tag)
335
- cleanup_gcp_folders(neo4j_session, common_job_parameters)
336
-
337
-
338
- @timeit
339
- def sync_gcp_projects(
340
- neo4j_session: neo4j.Session,
341
- projects: List[Dict],
342
- gcp_update_tag: int,
343
- common_job_parameters: Dict,
344
- ) -> None:
345
- """
346
- Load a given list of GCP project data to Neo4j and clean up stale nodes.
347
- :param neo4j_session: The Neo4j session
348
- :param projects: List of GCP projects; output from crm.get_gcp_projects()
349
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
350
- :param common_job_parameters: Parameters to carry to the Neo4j jobs
351
- :return: Nothing
352
- """
353
- logger.debug("Syncing GCP projects")
354
- load_gcp_projects(neo4j_session, projects, gcp_update_tag)
355
- cleanup_gcp_projects(neo4j_session, common_job_parameters)