cartography 0.112.0__py3-none-any.whl → 0.114.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (82) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +8 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -31
  5. cartography/intel/aws/apigatewayv2.py +116 -0
  6. cartography/intel/aws/iam.py +741 -492
  7. cartography/intel/aws/organizations.py +7 -8
  8. cartography/intel/aws/permission_relationships.py +4 -16
  9. cartography/intel/aws/resources.py +2 -0
  10. cartography/intel/azure/__init__.py +16 -0
  11. cartography/intel/azure/app_service.py +105 -0
  12. cartography/intel/azure/functions.py +124 -0
  13. cartography/intel/entra/__init__.py +31 -0
  14. cartography/intel/entra/app_role_assignments.py +277 -0
  15. cartography/intel/entra/applications.py +4 -238
  16. cartography/intel/entra/federation/__init__.py +0 -0
  17. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  18. cartography/intel/entra/service_principals.py +217 -0
  19. cartography/intel/gcp/__init__.py +136 -436
  20. cartography/intel/gcp/clients.py +65 -0
  21. cartography/intel/gcp/compute.py +18 -44
  22. cartography/intel/gcp/crm/__init__.py +0 -0
  23. cartography/intel/gcp/crm/folders.py +108 -0
  24. cartography/intel/gcp/crm/orgs.py +65 -0
  25. cartography/intel/gcp/crm/projects.py +109 -0
  26. cartography/intel/gcp/dns.py +82 -169
  27. cartography/intel/gcp/gke.py +72 -113
  28. cartography/intel/gcp/iam.py +66 -54
  29. cartography/intel/gcp/storage.py +75 -159
  30. cartography/intel/github/__init__.py +41 -0
  31. cartography/intel/github/commits.py +423 -0
  32. cartography/intel/github/repos.py +73 -39
  33. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  34. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  35. cartography/models/aws/iam/access_key.py +103 -0
  36. cartography/models/aws/iam/account_role.py +24 -0
  37. cartography/models/aws/iam/federated_principal.py +60 -0
  38. cartography/models/aws/iam/group.py +60 -0
  39. cartography/models/aws/iam/group_membership.py +26 -0
  40. cartography/models/aws/iam/inline_policy.py +78 -0
  41. cartography/models/aws/iam/managed_policy.py +51 -0
  42. cartography/models/aws/iam/policy_statement.py +57 -0
  43. cartography/models/aws/iam/role.py +83 -0
  44. cartography/models/aws/iam/root_principal.py +52 -0
  45. cartography/models/aws/iam/service_principal.py +30 -0
  46. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  47. cartography/models/aws/iam/user.py +54 -0
  48. cartography/models/azure/__init__.py +0 -0
  49. cartography/models/azure/app_service.py +59 -0
  50. cartography/models/azure/function_app.py +59 -0
  51. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  52. cartography/models/entra/service_principal.py +104 -0
  53. cartography/models/gcp/compute/subnet.py +74 -0
  54. cartography/models/gcp/crm/__init__.py +0 -0
  55. cartography/models/gcp/crm/folders.py +98 -0
  56. cartography/models/gcp/crm/organizations.py +21 -0
  57. cartography/models/gcp/crm/projects.py +100 -0
  58. cartography/models/gcp/dns.py +109 -0
  59. cartography/models/gcp/gke.py +69 -0
  60. cartography/models/gcp/iam.py +3 -0
  61. cartography/models/gcp/storage/__init__.py +0 -0
  62. cartography/models/gcp/storage/bucket.py +119 -0
  63. cartography/models/github/commits.py +63 -0
  64. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
  65. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/RECORD +69 -39
  66. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  67. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  68. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  69. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  70. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  71. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  72. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  73. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  74. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  75. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  76. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  77. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  78. cartography/intel/gcp/crm.py +0 -355
  79. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
  80. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
  81. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
  82. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import enum
2
2
  import json
3
3
  import logging
4
+ from collections import namedtuple
4
5
  from typing import Any
5
6
  from typing import Dict
6
7
  from typing import List
@@ -9,11 +10,26 @@ from typing import Tuple
9
10
  import boto3
10
11
  import neo4j
11
12
 
12
- from cartography.intel.aws.permission_relationships import parse_statement_node
13
+ from cartography.client.core.tx import load
14
+ from cartography.client.core.tx import load_matchlinks
15
+ from cartography.client.core.tx import read_list_of_dicts_tx
16
+ from cartography.client.core.tx import read_list_of_values_tx
17
+ from cartography.graph.job import GraphJob
13
18
  from cartography.intel.aws.permission_relationships import principal_allowed_on_resource
19
+ from cartography.models.aws.iam.access_key import AccountAccessKeySchema
20
+ from cartography.models.aws.iam.account_role import AWSAccountAWSRoleSchema
21
+ from cartography.models.aws.iam.federated_principal import AWSFederatedPrincipalSchema
22
+ from cartography.models.aws.iam.group import AWSGroupSchema
23
+ from cartography.models.aws.iam.inline_policy import AWSInlinePolicySchema
24
+ from cartography.models.aws.iam.managed_policy import AWSManagedPolicySchema
25
+ from cartography.models.aws.iam.policy_statement import AWSPolicyStatementSchema
26
+ from cartography.models.aws.iam.role import AWSRoleSchema
27
+ from cartography.models.aws.iam.root_principal import AWSRootPrincipalSchema
28
+ from cartography.models.aws.iam.service_principal import AWSServicePrincipalSchema
29
+ from cartography.models.aws.iam.sts_assumerole_allow import STSAssumeRoleAllowMatchLink
30
+ from cartography.models.aws.iam.user import AWSUserSchema
14
31
  from cartography.stats import get_stats_client
15
32
  from cartography.util import merge_module_sync_metadata
16
- from cartography.util import run_cleanup_job
17
33
  from cartography.util import timeit
18
34
 
19
35
  logger = logging.getLogger(__name__)
@@ -28,12 +44,32 @@ class PolicyType(enum.Enum):
28
44
  inline = "inline"
29
45
 
30
46
 
47
+ TransformedRoleData = namedtuple(
48
+ "TransformedRoleData",
49
+ [
50
+ "role_data",
51
+ "federated_principals",
52
+ "service_principals",
53
+ "external_aws_accounts",
54
+ ],
55
+ )
56
+
57
+ TransformedPolicyData = namedtuple(
58
+ "TransformedPolicyData",
59
+ [
60
+ "managed_policies",
61
+ "inline_policies",
62
+ "statements_by_policy_id",
63
+ ],
64
+ )
65
+
66
+
31
67
  def get_policy_name_from_arn(arn: str) -> str:
32
68
  return arn.split("/")[-1]
33
69
 
34
70
 
35
71
  @timeit
36
- def get_group_policies(boto3_session: boto3.session.Session, group_name: str) -> Dict:
72
+ def get_group_policies(boto3_session: boto3.Session, group_name: str) -> Dict:
37
73
  client = boto3_session.client("iam")
38
74
  paginator = client.get_paginator("list_group_policies")
39
75
  policy_names: List[Dict] = []
@@ -44,7 +80,7 @@ def get_group_policies(boto3_session: boto3.session.Session, group_name: str) ->
44
80
 
45
81
  @timeit
46
82
  def get_group_policy_info(
47
- boto3_session: boto3.session.Session,
83
+ boto3_session: boto3.Session,
48
84
  group_name: str,
49
85
  policy_name: str,
50
86
  ) -> Any:
@@ -54,7 +90,7 @@ def get_group_policy_info(
54
90
 
55
91
  @timeit
56
92
  def get_group_membership_data(
57
- boto3_session: boto3.session.Session,
93
+ boto3_session: boto3.Session,
58
94
  group_name: str,
59
95
  ) -> Dict:
60
96
  client = boto3_session.client("iam")
@@ -72,7 +108,7 @@ def get_group_membership_data(
72
108
 
73
109
  @timeit
74
110
  def get_group_policy_data(
75
- boto3_session: boto3.session.Session,
111
+ boto3_session: boto3.Session,
76
112
  group_list: List[Dict],
77
113
  ) -> Dict:
78
114
  resource_client = boto3_session.resource("iam")
@@ -90,7 +126,7 @@ def get_group_policy_data(
90
126
 
91
127
  @timeit
92
128
  def get_group_managed_policy_data(
93
- boto3_session: boto3.session.Session,
129
+ boto3_session: boto3.Session,
94
130
  group_list: List[Dict],
95
131
  ) -> Dict:
96
132
  resource_client = boto3_session.resource("iam")
@@ -108,7 +144,7 @@ def get_group_managed_policy_data(
108
144
 
109
145
  @timeit
110
146
  def get_user_policy_data(
111
- boto3_session: boto3.session.Session,
147
+ boto3_session: boto3.Session,
112
148
  user_list: List[Dict],
113
149
  ) -> Dict:
114
150
  resource_client = boto3_session.resource("iam")
@@ -131,7 +167,7 @@ def get_user_policy_data(
131
167
 
132
168
  @timeit
133
169
  def get_user_managed_policy_data(
134
- boto3_session: boto3.session.Session,
170
+ boto3_session: boto3.Session,
135
171
  user_list: List[Dict],
136
172
  ) -> Dict:
137
173
  resource_client = boto3_session.resource("iam")
@@ -154,7 +190,7 @@ def get_user_managed_policy_data(
154
190
 
155
191
  @timeit
156
192
  def get_role_policy_data(
157
- boto3_session: boto3.session.Session,
193
+ boto3_session: boto3.Session,
158
194
  role_list: List[Dict],
159
195
  ) -> Dict:
160
196
  resource_client = boto3_session.resource("iam")
@@ -177,7 +213,7 @@ def get_role_policy_data(
177
213
 
178
214
  @timeit
179
215
  def get_role_managed_policy_data(
180
- boto3_session: boto3.session.Session,
216
+ boto3_session: boto3.Session,
181
217
  role_list: List[Dict],
182
218
  ) -> Dict:
183
219
  resource_client = boto3_session.resource("iam")
@@ -199,7 +235,7 @@ def get_role_managed_policy_data(
199
235
 
200
236
 
201
237
  @timeit
202
- def get_role_tags(boto3_session: boto3.session.Session) -> List[Dict]:
238
+ def get_role_tags(boto3_session: boto3.Session) -> List[Dict]:
203
239
  role_list = get_role_list_data(boto3_session)["Roles"]
204
240
  resource_client = boto3_session.resource("iam")
205
241
  role_tag_data: List[Dict] = []
@@ -221,7 +257,7 @@ def get_role_tags(boto3_session: boto3.session.Session) -> List[Dict]:
221
257
 
222
258
 
223
259
  @timeit
224
- def get_user_list_data(boto3_session: boto3.session.Session) -> Dict:
260
+ def get_user_list_data(boto3_session: boto3.Session) -> Dict:
225
261
  client = boto3_session.client("iam")
226
262
 
227
263
  paginator = client.get_paginator("list_users")
@@ -232,7 +268,7 @@ def get_user_list_data(boto3_session: boto3.session.Session) -> Dict:
232
268
 
233
269
 
234
270
  @timeit
235
- def get_group_list_data(boto3_session: boto3.session.Session) -> Dict:
271
+ def get_group_list_data(boto3_session: boto3.Session) -> Dict:
236
272
  client = boto3_session.client("iam")
237
273
  paginator = client.get_paginator("list_groups")
238
274
  groups: List[Dict] = []
@@ -242,7 +278,7 @@ def get_group_list_data(boto3_session: boto3.session.Session) -> Dict:
242
278
 
243
279
 
244
280
  @timeit
245
- def get_role_list_data(boto3_session: boto3.session.Session) -> Dict:
281
+ def get_role_list_data(boto3_session: boto3.Session) -> Dict:
246
282
  client = boto3_session.client("iam")
247
283
  paginator = client.get_paginator("list_roles")
248
284
  roles: List[Dict] = []
@@ -251,9 +287,33 @@ def get_role_list_data(boto3_session: boto3.session.Session) -> Dict:
251
287
  return {"Roles": roles}
252
288
 
253
289
 
290
+ @timeit
291
+ def get_user_access_keys_data(
292
+ boto3_session: boto3.Session,
293
+ users: list[dict[str, Any]],
294
+ ) -> dict[str, list[dict[str, Any]]]:
295
+ """
296
+ Get access key data for all users.
297
+ Returns a dict mapping user ARN to list of access key data.
298
+ """
299
+ user_access_keys = {}
300
+
301
+ for user in users:
302
+ username = user["name"]
303
+ user_arn = user["arn"]
304
+
305
+ access_keys = get_account_access_key_data(boto3_session, username)
306
+ if access_keys and "AccessKeyMetadata" in access_keys:
307
+ user_access_keys[user_arn] = access_keys["AccessKeyMetadata"]
308
+ else:
309
+ user_access_keys[user_arn] = []
310
+
311
+ return user_access_keys
312
+
313
+
254
314
  @timeit
255
315
  def get_account_access_key_data(
256
- boto3_session: boto3.session.Session,
316
+ boto3_session: boto3.Session,
257
317
  username: str,
258
318
  ) -> Dict:
259
319
  client = boto3_session.client("iam")
@@ -280,223 +340,256 @@ def get_account_access_key_data(
280
340
 
281
341
 
282
342
  @timeit
283
- def load_users(
284
- neo4j_session: neo4j.Session,
285
- users: List[Dict],
286
- current_aws_account_id: str,
287
- aws_update_tag: int,
288
- ) -> None:
289
- ingest_user = """
290
- MERGE (unode:AWSUser{arn: $ARN})
291
- ON CREATE SET unode:AWSPrincipal, unode.userid = $USERID, unode.firstseen = timestamp(),
292
- unode.createdate = $CREATE_DATE
293
- SET unode.name = $USERNAME, unode.path = $PATH, unode.passwordlastused = $PASSWORD_LASTUSED,
294
- unode.lastupdated = $aws_update_tag
295
- WITH unode
296
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
297
- MERGE (aa)-[r:RESOURCE]->(unode)
298
- ON CREATE SET r.firstseen = timestamp()
299
- SET r.lastupdated = $aws_update_tag
343
+ def get_group_memberships(
344
+ boto3_session: boto3.Session, groups: list[dict[str, Any]]
345
+ ) -> dict[str, list[str]]:
300
346
  """
301
- logger.info(f"Loading {len(users)} IAM users.")
302
- for user in users:
303
- neo4j_session.run(
304
- ingest_user,
305
- ARN=user["Arn"],
306
- USERID=user["UserId"],
307
- CREATE_DATE=str(user["CreateDate"]),
308
- USERNAME=user["UserName"],
309
- PATH=user["Path"],
310
- PASSWORD_LASTUSED=str(user.get("PasswordLastUsed", "")),
311
- AWS_ACCOUNT_ID=current_aws_account_id,
312
- aws_update_tag=aws_update_tag,
313
- )
347
+ Get membership data for all groups.
348
+ Returns a dict mapping group ARN to list of user ARNs.
349
+ """
350
+ memberships = {}
351
+ for group in groups:
352
+ try:
353
+ membership_data = get_group_membership_data(
354
+ boto3_session, group["GroupName"]
355
+ )
356
+ if membership_data and "Users" in membership_data:
357
+ memberships[group["Arn"]] = [
358
+ user["Arn"] for user in membership_data["Users"]
359
+ ]
360
+ else:
361
+ memberships[group["Arn"]] = []
362
+ except Exception:
363
+ logger.warning(
364
+ f"Could not get membership data for group {group['GroupName']}",
365
+ exc_info=True,
366
+ )
367
+ memberships[group["Arn"]] = []
368
+
369
+ return memberships
314
370
 
315
371
 
316
372
  @timeit
317
- def load_groups(
373
+ def get_policies_for_principal(
318
374
  neo4j_session: neo4j.Session,
319
- groups: List[Dict],
320
- current_aws_account_id: str,
321
- aws_update_tag: int,
322
- ) -> None:
323
- ingest_group = """
324
- MERGE (gnode:AWSGroup{arn: $ARN})
325
- ON CREATE SET gnode.groupid = $GROUP_ID, gnode.firstseen = timestamp(), gnode.createdate = $CREATE_DATE
326
- SET gnode:AWSPrincipal, gnode.name = $GROUP_NAME, gnode.path = $PATH,gnode.lastupdated = $aws_update_tag
327
- WITH gnode
328
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
329
- MERGE (aa)-[r:RESOURCE]->(gnode)
330
- ON CREATE SET r.firstseen = timestamp()
331
- SET r.lastupdated = $aws_update_tag
375
+ principal_arn: str,
376
+ ) -> Dict:
377
+ get_policy_query = """
378
+ MATCH
379
+ (principal:AWSPrincipal{arn:$Arn})-[:POLICY]->
380
+ (policy:AWSPolicy)-[:STATEMENT]->
381
+ (statements:AWSPolicyStatement)
382
+ RETURN
383
+ DISTINCT policy.id AS policy_id,
384
+ COLLECT(DISTINCT statements) AS statements
332
385
  """
333
- logger.info(f"Loading {len(groups)} IAM groups to the graph.")
334
- for group in groups:
335
- neo4j_session.run(
336
- ingest_group,
337
- ARN=group["Arn"],
338
- GROUP_ID=group["GroupId"],
339
- CREATE_DATE=str(group["CreateDate"]),
340
- GROUP_NAME=group["GroupName"],
341
- PATH=group["Path"],
342
- AWS_ACCOUNT_ID=current_aws_account_id,
343
- aws_update_tag=aws_update_tag,
344
- )
386
+ results = neo4j_session.execute_read(
387
+ read_list_of_dicts_tx,
388
+ get_policy_query,
389
+ Arn=principal_arn,
390
+ )
391
+ policies = {r["policy_id"]: r["statements"] for r in results}
392
+ return policies
345
393
 
346
394
 
347
- def _parse_principal_entries(principal: Dict) -> List[Tuple[Any, Any]]:
348
- """
349
- Returns a list of tuples of the form (principal_type, principal_value)
350
- e.g. [('AWS', 'example-role-name'), ('Service', 'example-service')]
351
- """
352
- principal_entries = []
353
- for principal_type in principal:
354
- principal_values = principal[principal_type]
355
- if not isinstance(principal_values, list):
356
- principal_values = [principal_values]
357
- for principal_value in principal_values:
358
- principal_entries.append((principal_type, principal_value))
359
- return principal_entries
395
+ def transform_users(users: list[dict[str, Any]]) -> list[dict[str, Any]]:
396
+ user_data = []
397
+ for user in users:
398
+ user_record = {
399
+ "arn": user["Arn"],
400
+ "userid": user["UserId"],
401
+ "name": user["UserName"],
402
+ "path": user["Path"],
403
+ "createdate": str(user["CreateDate"]),
404
+ "passwordlastused": str(user.get("PasswordLastUsed", "")),
405
+ }
406
+ user_data.append(user_record)
360
407
 
408
+ return user_data
361
409
 
362
- @timeit
363
- def load_roles(
364
- neo4j_session: neo4j.Session,
365
- roles: List[Dict],
366
- current_aws_account_id: str,
367
- aws_update_tag: int,
368
- ) -> None:
369
- ingest_role = """
370
- MERGE (rnode:AWSPrincipal{arn: $Arn})
371
- ON CREATE SET rnode.firstseen = timestamp()
372
- SET
373
- rnode:AWSRole,
374
- rnode.roleid = $RoleId,
375
- rnode.createdate = $CreateDate,
376
- rnode.name = $RoleName,
377
- rnode.path = $Path,
378
- rnode.lastupdated = $aws_update_tag
379
- WITH rnode
380
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
381
- MERGE (aa)-[r:RESOURCE]->(rnode)
382
- ON CREATE SET r.firstseen = timestamp()
383
- SET r.lastupdated = $aws_update_tag
384
- """
385
410
 
386
- ingest_policy_statement = """
387
- MERGE (spnnode:AWSPrincipal{arn: $SpnArn})
388
- ON CREATE SET spnnode.firstseen = timestamp()
389
- SET spnnode.lastupdated = $aws_update_tag, spnnode.type = $SpnType
390
- WITH spnnode
391
- MATCH (role:AWSRole{arn: $RoleArn})
392
- MERGE (role)-[r:TRUSTS_AWS_PRINCIPAL]->(spnnode)
393
- ON CREATE SET r.firstseen = timestamp()
394
- SET r.lastupdated = $aws_update_tag
411
+ def transform_groups(
412
+ groups: list[dict[str, Any]], group_memberships: dict[str, list[str]]
413
+ ) -> list[dict[str, Any]]:
414
+ group_data = []
415
+ for group in groups:
416
+ group_record = {
417
+ "arn": group["Arn"],
418
+ "groupid": group["GroupId"],
419
+ "name": group["GroupName"],
420
+ "path": group["Path"],
421
+ "createdate": str(group["CreateDate"]),
422
+ "user_arns": group_memberships.get(group["Arn"], []),
423
+ }
424
+ group_data.append(group_record)
425
+
426
+ return group_data
427
+
428
+
429
+ def transform_access_keys(
430
+ user_access_keys: dict[str, list[dict[str, Any]]],
431
+ ) -> list[dict[str, Any]]:
432
+ access_key_data = []
433
+ for user_arn, access_keys in user_access_keys.items():
434
+ for access_key in access_keys:
435
+ if access_key.get("AccessKeyId"):
436
+ access_key_record = {
437
+ "accesskeyid": access_key["AccessKeyId"],
438
+ "createdate": str(access_key["CreateDate"]),
439
+ "status": access_key["Status"],
440
+ "lastuseddate": str(access_key.get("LastUsedDate", "")),
441
+ "lastusedservice": access_key.get("LastUsedService", ""),
442
+ "lastusedregion": access_key.get("LastUsedRegion", ""),
443
+ "user_arn": user_arn, # For the sub-resource relationship
444
+ }
445
+ access_key_data.append(access_key_record)
446
+
447
+ return access_key_data
448
+
449
+
450
+ def transform_role_trust_policies(
451
+ roles: list[dict[str, Any]], current_aws_account_id: str
452
+ ) -> TransformedRoleData:
395
453
  """
396
-
397
- # Note - why we don't set inscope or foreign attribute on the account
398
- #
399
- # we are agnostic here if this is the AWSAccount is part of the sync scope or
400
- # a foreign AWS account that contains a trusted principal. The account could also be inscope
401
- # but not sync yet.
402
- # - The inscope attribute - set when the account is being sync.
403
- # - The foreign attribute - the attribute assignment logic is in aws_foreign_accounts.json analysis job
404
- # - Why seperate statement is needed - the arn may point to service level principals ex - ec2.amazonaws.com
405
- ingest_spnmap_statement = """
406
- MERGE (aa:AWSAccount{id: $SpnAccountId})
407
- ON CREATE SET aa.firstseen = timestamp()
408
- SET aa.lastupdated = $aws_update_tag
409
- WITH aa
410
- MATCH (spnnode:AWSPrincipal{arn: $SpnArn})
411
- WITH spnnode, aa
412
- MERGE (aa)-[r:RESOURCE]->(spnnode)
413
- ON CREATE SET r.firstseen = timestamp()
454
+ Processes AWS role assumption policy documents in the list_roles response.
455
+ Returns a TransformedRoleData object containing the role data, federated principals, service principals, and external AWS accounts.
414
456
  """
457
+ role_data: list[dict[str, Any]] = []
458
+ federated_principals: list[dict[str, Any]] = []
459
+ service_principals: list[dict[str, Any]] = []
460
+ external_aws_accounts: list[dict[str, Any]] = []
415
461
 
416
- # TODO support conditions
417
- logger.info(f"Loading {len(roles)} IAM roles to the graph.")
418
462
  for role in roles:
419
- neo4j_session.run(
420
- ingest_role,
421
- Arn=role["Arn"],
422
- RoleId=role["RoleId"],
423
- CreateDate=str(role["CreateDate"]),
424
- RoleName=role["RoleName"],
425
- Path=role["Path"],
426
- AWS_ACCOUNT_ID=current_aws_account_id,
427
- aws_update_tag=aws_update_tag,
428
- )
463
+ role_arn = role["Arn"]
429
464
 
465
+ # List of principals of type "AWS" that this role trusts
466
+ trusted_aws_principals = set()
467
+ # Process each statement in the assume role policy document
468
+ # TODO support conditions
430
469
  for statement in role["AssumeRolePolicyDocument"]["Statement"]:
470
+
431
471
  principal_entries = _parse_principal_entries(statement["Principal"])
432
- for principal_type, principal_value in principal_entries:
433
- neo4j_session.run(
434
- ingest_policy_statement,
435
- SpnArn=principal_value,
436
- SpnType=principal_type,
437
- RoleArn=role["Arn"],
438
- aws_update_tag=aws_update_tag,
439
- )
440
- spn_arn = get_account_from_arn(principal_value)
441
- if spn_arn:
442
- neo4j_session.run(
443
- ingest_spnmap_statement,
444
- SpnArn=principal_value,
445
- SpnAccountId=get_account_from_arn(principal_value),
446
- aws_update_tag=aws_update_tag,
472
+ for principal_type, principal_arn in principal_entries:
473
+ if principal_type == "Federated":
474
+ # Add this to list of federated nodes to create
475
+ account_id = get_account_from_arn(principal_arn)
476
+ federated_principals.append(
477
+ {
478
+ "arn": principal_arn,
479
+ "type": "Federated",
480
+ "other_account_id": (
481
+ account_id
482
+ if account_id != current_aws_account_id
483
+ else None
484
+ ),
485
+ "role_arn": role_arn,
486
+ }
487
+ )
488
+ trusted_aws_principals.add(principal_arn)
489
+ elif principal_type == "Service":
490
+ # Add to the list of service nodes to create
491
+ service_principals.append(
492
+ {
493
+ "arn": principal_arn,
494
+ "type": "Service",
495
+ }
447
496
  )
497
+ # Service principals are global so there is no account id.
498
+ trusted_aws_principals.add(principal_arn)
499
+ elif principal_type == "AWS":
500
+ if "root" in principal_arn:
501
+ # The current principal trusts a root principal.
502
+
503
+ # First check if the root principal is in a different account than the current one.
504
+ # Add what we know about that account to the graph.
505
+ account_id = get_account_from_arn(principal_arn)
506
+ if account_id != current_aws_account_id:
507
+ external_aws_accounts.append({"id": account_id})
508
+ trusted_aws_principals.add(principal_arn)
509
+ else:
510
+ # This should not happen but who knows.
511
+ logger.warning(f"Unknown principal type: {principal_type}")
512
+
513
+ role_record = {
514
+ "arn": role["Arn"],
515
+ "roleid": role["RoleId"],
516
+ "name": role["RoleName"],
517
+ "path": role["Path"],
518
+ "createdate": str(role["CreateDate"]),
519
+ "trusted_aws_principals": list(trusted_aws_principals),
520
+ "account_id": get_account_from_arn(role["Arn"]),
521
+ }
522
+ role_data.append(role_record)
523
+
524
+ return TransformedRoleData(
525
+ role_data=role_data,
526
+ federated_principals=federated_principals,
527
+ service_principals=service_principals,
528
+ external_aws_accounts=external_aws_accounts,
529
+ )
448
530
 
449
531
 
450
532
  @timeit
451
- def load_group_memberships(
533
+ def load_users(
452
534
  neo4j_session: neo4j.Session,
453
- group_memberships: Dict,
535
+ users: List[Dict],
536
+ current_aws_account_id: str,
454
537
  aws_update_tag: int,
455
538
  ) -> None:
456
- ingest_membership = """
457
- MATCH (group:AWSGroup{arn: $GroupArn})
458
- WITH group
459
- MATCH (user:AWSUser{arn: $PrincipalArn})
460
- MERGE (user)-[r:MEMBER_AWS_GROUP]->(group)
461
- ON CREATE SET r.firstseen = timestamp()
462
- SET r.lastupdated = $aws_update_tag
463
- WITH user, group
464
- MATCH (group)-[:POLICY]->(policy:AWSPolicy)
465
- MERGE (user)-[r2:POLICY]->(policy)
466
- SET r2.lastupdated = $aws_update_tag
467
- """
539
+ load(
540
+ neo4j_session,
541
+ AWSUserSchema(),
542
+ users,
543
+ lastupdated=aws_update_tag,
544
+ AWS_ID=current_aws_account_id,
545
+ )
468
546
 
469
- for group_arn, membership_data in group_memberships.items():
470
- for info in membership_data.get("Users", []):
471
- principal_arn = info["Arn"]
472
- neo4j_session.run(
473
- ingest_membership,
474
- GroupArn=group_arn,
475
- PrincipalArn=principal_arn,
476
- aws_update_tag=aws_update_tag,
477
- )
547
+
548
+ @timeit
549
+ def load_groups(
550
+ neo4j_session: neo4j.Session,
551
+ groups: List[Dict],
552
+ current_aws_account_id: str,
553
+ aws_update_tag: int,
554
+ ) -> None:
555
+ load(
556
+ neo4j_session,
557
+ AWSGroupSchema(),
558
+ groups,
559
+ lastupdated=aws_update_tag,
560
+ AWS_ID=current_aws_account_id,
561
+ )
478
562
 
479
563
 
480
564
  @timeit
481
- def get_policies_for_principal(
565
+ def load_access_keys(
482
566
  neo4j_session: neo4j.Session,
483
- principal_arn: str,
484
- ) -> Dict:
485
- get_policy_query = """
486
- MATCH
487
- (principal:AWSPrincipal{arn:$Arn})-[:POLICY]->
488
- (policy:AWSPolicy)-[:STATEMENT]->
489
- (statements:AWSPolicyStatement)
490
- RETURN
491
- DISTINCT policy.id AS policy_id,
492
- COLLECT(DISTINCT statements) AS statements
493
- """
494
- results = neo4j_session.run(
495
- get_policy_query,
496
- Arn=principal_arn,
567
+ access_keys: List[Dict],
568
+ aws_update_tag: int,
569
+ current_aws_account_id: str,
570
+ ) -> None:
571
+ load(
572
+ neo4j_session,
573
+ AccountAccessKeySchema(),
574
+ access_keys,
575
+ lastupdated=aws_update_tag,
576
+ AWS_ID=current_aws_account_id,
497
577
  )
498
- policies = {r["policy_id"]: parse_statement_node(r["statements"]) for r in results}
499
- return policies
578
+
579
+
580
+ def _parse_principal_entries(principal: Dict) -> List[Tuple[Any, Any]]:
581
+ """
582
+ Returns a list of tuples of the form (principal_type, principal_value)
583
+ e.g. [('AWS', 'example-role-name'), ('Service', 'example-service')]
584
+ """
585
+ principal_entries = []
586
+ for principal_type in principal:
587
+ principal_values = principal[principal_type]
588
+ if not isinstance(principal_values, list):
589
+ principal_values = [principal_values]
590
+ for principal_value in principal_values:
591
+ principal_entries.append((principal_type, principal_value))
592
+ return principal_entries
500
593
 
501
594
 
502
595
  @timeit
@@ -514,81 +607,46 @@ def sync_assumerole_relationships(
514
607
  )
515
608
  query_potential_matches = """
516
609
  MATCH (:AWSAccount{id:$AccountId})-[:RESOURCE]->(target:AWSRole)-[:TRUSTS_AWS_PRINCIPAL]->(source:AWSPrincipal)
517
- WHERE NOT source.arn ENDS WITH 'root'
518
- AND NOT source.type = 'Service'
519
- AND NOT source.type = 'Federated'
520
- RETURN target.arn AS target_arn,
521
- source.arn AS source_arn
522
- """
523
-
524
- ingest_policies_assume_role = """
525
- MATCH (source:AWSPrincipal{arn: $SourceArn})
526
- WITH source
527
- MATCH (role:AWSRole{arn: $TargetArn})
528
- WITH role, source
529
- MERGE (source)-[r:STS_ASSUMEROLE_ALLOW]->(role)
530
- ON CREATE SET r.firstseen = timestamp()
531
- SET r.lastupdated = $aws_update_tag
610
+ WHERE NOT source:AWSRootPrincipal
611
+ AND NOT source:AWSServicePrincipal
612
+ AND NOT source:AWSFederatedPrincipal
613
+ RETURN target.arn AS target_arn, source.arn AS source_arn
532
614
  """
533
-
534
- results = neo4j_session.run(
615
+ results = neo4j_session.execute_read(
616
+ read_list_of_dicts_tx,
535
617
  query_potential_matches,
536
618
  AccountId=current_aws_account_id,
537
619
  )
538
- potential_matches = [(r["source_arn"], r["target_arn"]) for r in results]
539
- for source_arn, target_arn in potential_matches:
620
+
621
+ # Filter potential matches to only those where the source principal has sts:AssumeRole permission
622
+ valid_matches = []
623
+ for result in results:
624
+ source_arn = result["source_arn"]
625
+ target_arn = result["target_arn"]
540
626
  policies = get_policies_for_principal(neo4j_session, source_arn)
541
627
  if principal_allowed_on_resource(policies, target_arn, ["sts:AssumeRole"]):
542
- neo4j_session.run(
543
- ingest_policies_assume_role,
544
- SourceArn=source_arn,
545
- TargetArn=target_arn,
546
- aws_update_tag=aws_update_tag,
628
+ valid_matches.append(
629
+ {
630
+ "source_arn": source_arn,
631
+ "target_arn": target_arn,
632
+ }
547
633
  )
548
- run_cleanup_job(
549
- "aws_import_roles_policy_cleanup.json",
634
+
635
+ load_matchlinks(
550
636
  neo4j_session,
551
- common_job_parameters,
637
+ STSAssumeRoleAllowMatchLink(),
638
+ valid_matches,
639
+ lastupdated=aws_update_tag,
640
+ _sub_resource_label="AWSAccount",
641
+ _sub_resource_id=current_aws_account_id,
552
642
  )
553
643
 
554
-
555
- @timeit
556
- def load_user_access_keys(
557
- neo4j_session: neo4j.Session,
558
- user_access_keys: Dict,
559
- aws_update_tag: int,
560
- ) -> None:
561
- # TODO change the node label to reflect that this is a user access key, not an account access key
562
- ingest_account_key = """
563
- MATCH (user:AWSUser{arn: $UserARN})
564
- WITH user
565
- MERGE (key:AccountAccessKey{accesskeyid: $AccessKeyId})
566
- ON CREATE SET key.firstseen = timestamp(), key.createdate = $CreateDate
567
- SET key.status = $Status,
568
- key.lastupdated = $aws_update_tag,
569
- key.lastuseddate = $LastUsedDate,
570
- key.lastusedservice = $LastUsedService,
571
- key.lastusedregion = $LastUsedRegion
572
- WITH user,key
573
- MERGE (user)-[r:AWS_ACCESS_KEY]->(key)
574
- ON CREATE SET r.firstseen = timestamp()
575
- SET r.lastupdated = $aws_update_tag
576
- """
577
-
578
- for arn, access_keys in user_access_keys.items():
579
- for key in access_keys["AccessKeyMetadata"]:
580
- if key.get("AccessKeyId"):
581
- neo4j_session.run(
582
- ingest_account_key,
583
- UserARN=arn,
584
- AccessKeyId=key["AccessKeyId"],
585
- CreateDate=str(key["CreateDate"]),
586
- Status=key["Status"],
587
- LastUsedDate=key["LastUsedDate"],
588
- LastUsedService=key["LastUsedService"],
589
- LastUsedRegion=key["LastUsedRegion"],
590
- aws_update_tag=aws_update_tag,
591
- )
644
+ GraphJob.from_matchlink(
645
+ STSAssumeRoleAllowMatchLink(),
646
+ sub_resource_label="AWSAccount",
647
+ sub_resource_id=current_aws_account_id,
648
+ update_tag=aws_update_tag,
649
+ ).run(neo4j_session)
592
650
 
593
651
 
594
652
  def ensure_list(obj: Any) -> List[Any]:
@@ -597,304 +655,460 @@ def ensure_list(obj: Any) -> List[Any]:
597
655
  return obj
598
656
 
599
657
 
600
- def _transform_policy_statements(statements: Any, policy_id: str) -> List[Dict]:
658
+ def _transform_policy_statements(
659
+ statements: Any, policy_id: str
660
+ ) -> list[dict[str, Any]]:
661
+ result: List[Dict[str, Any]] = []
601
662
  count = 1
663
+
602
664
  if not isinstance(statements, list):
603
665
  statements = [statements]
666
+
604
667
  for stmt in statements:
668
+ # Determine statement ID
605
669
  if "Sid" in stmt and stmt["Sid"]:
606
670
  statement_id = stmt["Sid"]
607
671
  else:
608
672
  statement_id = count
609
673
  count += 1
610
674
 
611
- stmt["id"] = f"{policy_id}/statement/{statement_id}"
675
+ transformed_stmt = {
676
+ "id": f"{policy_id}/statement/{statement_id}",
677
+ "policy_id": policy_id, # For the relationship to AWSPolicy
678
+ "Effect": stmt.get("Effect"),
679
+ "Sid": stmt.get("Sid"),
680
+ }
681
+
682
+ # Handle list fields
612
683
  if "Resource" in stmt:
613
- stmt["Resource"] = ensure_list(stmt["Resource"])
684
+ transformed_stmt["Resource"] = ensure_list(stmt["Resource"])
614
685
  if "Action" in stmt:
615
- stmt["Action"] = ensure_list(stmt["Action"])
686
+ transformed_stmt["Action"] = ensure_list(stmt["Action"])
616
687
  if "NotAction" in stmt:
617
- stmt["NotAction"] = ensure_list(stmt["NotAction"])
688
+ transformed_stmt["NotAction"] = ensure_list(stmt["NotAction"])
618
689
  if "NotResource" in stmt:
619
- stmt["NotResource"] = ensure_list(stmt["NotResource"])
690
+ transformed_stmt["NotResource"] = ensure_list(stmt["NotResource"])
620
691
  if "Condition" in stmt:
621
- stmt["Condition"] = json.dumps(ensure_list(stmt["Condition"]))
622
- return statements
692
+ transformed_stmt["Condition"] = json.dumps(ensure_list(stmt["Condition"]))
693
+
694
+ result.append(transformed_stmt)
695
+
696
+ return result
623
697
 
624
698
 
625
- def transform_policy_data(policy_map: Dict, policy_type: str) -> None:
699
+ def transform_policy_data(
700
+ policy_map: dict[str, dict[str, Any]], policy_type: str
701
+ ) -> TransformedPolicyData:
702
+ """
703
+ Processes AWS IAM policy documents. Returns a TransformedPolicyData object containing the managed policies, inline policies, and statements by policy id -- all ready to be loaded to the graph.
704
+ """
705
+ # First pass: collect all policies and their principals
706
+ policy_to_principals: dict[str, set[str]] = {}
707
+ policy_to_statements: dict[str, list[dict[str, Any]]] = {}
708
+ policy_to_name: dict[str, str] = {}
709
+
626
710
  for principal_arn, policy_statement_map in policy_map.items():
627
- logger.debug(
628
- f"Transforming IAM {policy_type} policies for principal {principal_arn}",
629
- )
630
711
  for policy_key, statements in policy_statement_map.items():
631
712
  policy_id = (
632
- transform_policy_id(
633
- principal_arn,
634
- policy_type,
635
- policy_key,
636
- )
713
+ transform_policy_id(principal_arn, policy_type, policy_key)
637
714
  if policy_type == PolicyType.inline.value
638
715
  else policy_key
639
716
  )
640
- policy_statement_map[policy_key] = _transform_policy_statements(
717
+ policy_name = (
718
+ policy_key
719
+ if policy_type == PolicyType.inline.value
720
+ else get_policy_name_from_arn(policy_key)
721
+ )
722
+ # Map policy id to the principal arns that have it
723
+ if policy_id not in policy_to_principals:
724
+ policy_to_principals[policy_id] = set()
725
+ policy_to_principals[policy_id].add(principal_arn)
726
+
727
+ # Map policy id to policy name
728
+ policy_to_name[policy_id] = policy_name
729
+
730
+ # Transform and store statements
731
+ transformed_statements = _transform_policy_statements(
641
732
  statements,
642
733
  policy_id,
643
734
  )
735
+ policy_to_statements[policy_id] = transformed_statements
736
+
737
+ # Second pass: create consolidated policy data
738
+ managed_policy_data = []
739
+ inline_policy_data = []
740
+
741
+ for policy_id, principal_arns in policy_to_principals.items():
742
+ policy_name = policy_to_name[policy_id]
743
+
744
+ policy_data = {
745
+ "id": policy_id,
746
+ "name": policy_name,
747
+ "type": policy_type,
748
+ # AWS inline policies don't have arns
749
+ "arn": policy_id if policy_type == PolicyType.managed.value else None,
750
+ "principal_arns": list(principal_arns),
751
+ }
752
+
753
+ if policy_type == PolicyType.inline.value:
754
+ inline_policy_data.append(policy_data)
755
+ elif policy_type == PolicyType.managed.value:
756
+ managed_policy_data.append(policy_data)
757
+ else:
758
+ # This really should never happen so just explicitly having a `pass` here.
759
+ pass
760
+
761
+ return TransformedPolicyData(
762
+ managed_policies=managed_policy_data,
763
+ inline_policies=inline_policy_data,
764
+ statements_by_policy_id=policy_to_statements,
765
+ )
644
766
 
645
767
 
646
768
  def transform_policy_id(principal_arn: str, policy_type: str, name: str) -> str:
647
769
  return f"{principal_arn}/{policy_type}_policy/{name}"
648
770
 
649
771
 
650
- def _load_policy_tx(
651
- tx: neo4j.Transaction,
652
- policy_id: str,
653
- policy_name: str,
654
- policy_type: str,
655
- principal_arn: str,
772
+ def _load_policy(
773
+ neo4j_session: neo4j.Session,
774
+ managed_policy_data: list[dict[str, Any]],
775
+ inline_policy_data: list[dict[str, Any]],
776
+ account_id: str,
656
777
  aws_update_tag: int,
657
778
  ) -> None:
658
- ingest_policy = """
659
- MERGE (policy:AWSPolicy{id: $PolicyId})
660
- ON CREATE SET
661
- policy.firstseen = timestamp(),
662
- policy.type = $PolicyType,
663
- policy.name = $PolicyName
664
- SET policy.lastupdated = $aws_update_tag
665
- WITH policy
666
- MATCH (principal:AWSPrincipal{arn: $PrincipalArn})
667
- MERGE (policy) <-[r:POLICY]-(principal)
668
- SET r.lastupdated = $aws_update_tag
669
- """
670
- tx.run(
671
- ingest_policy,
672
- PolicyId=policy_id,
673
- PolicyName=policy_name,
674
- PolicyType=policy_type,
675
- PrincipalArn=principal_arn,
676
- aws_update_tag=aws_update_tag,
779
+ load(
780
+ neo4j_session,
781
+ AWSManagedPolicySchema(),
782
+ managed_policy_data,
783
+ lastupdated=aws_update_tag,
784
+ )
785
+ load(
786
+ neo4j_session,
787
+ AWSInlinePolicySchema(),
788
+ inline_policy_data,
789
+ lastupdated=aws_update_tag,
790
+ AWS_ID=account_id,
677
791
  )
678
792
 
679
793
 
680
794
  @timeit
681
- def load_policy(
795
+ def load_policy_statements(
682
796
  neo4j_session: neo4j.Session,
683
- policy_id: str,
684
- policy_name: str,
685
- policy_type: str,
686
- principal_arn: str,
797
+ statements: list[dict[str, Any]],
687
798
  aws_update_tag: int,
688
799
  ) -> None:
689
- neo4j_session.write_transaction(
690
- _load_policy_tx,
691
- policy_id,
692
- policy_name,
693
- policy_type,
694
- principal_arn,
695
- aws_update_tag,
800
+ load(
801
+ neo4j_session,
802
+ AWSPolicyStatementSchema(),
803
+ statements,
804
+ lastupdated=aws_update_tag,
805
+ POLICY_ID=statements[0]["policy_id"],
696
806
  )
697
807
 
698
808
 
699
809
  @timeit
700
- def load_policy_statements(
810
+ def _load_policy_statements(
701
811
  neo4j_session: neo4j.Session,
702
- policy_id: str,
703
- policy_name: str,
704
- statements: Any,
812
+ policy_statements: dict[str, list[dict[str, Any]]],
705
813
  aws_update_tag: int,
706
814
  ) -> None:
707
- ingest_policy_statement = """
708
- MATCH (policy:AWSPolicy{id: $PolicyId})
709
- WITH policy
710
- UNWIND $Statements as statement_data
711
- MERGE (statement:AWSPolicyStatement{id: statement_data.id})
712
- SET
713
- statement.effect = statement_data.Effect,
714
- statement.action = statement_data.Action,
715
- statement.notaction = statement_data.NotAction,
716
- statement.resource = statement_data.Resource,
717
- statement.notresource = statement_data.NotResource,
718
- statement.condition = statement_data.Condition,
719
- statement.sid = statement_data.Sid,
720
- statement.lastupdated = $aws_update_tag
721
- MERGE (policy)-[r:STATEMENT]->(statement)
722
- ON CREATE SET r.firstseen = timestamp()
723
- SET r.lastupdated = $aws_update_tag
724
- """
725
- neo4j_session.run(
726
- ingest_policy_statement,
727
- PolicyId=policy_id,
728
- PolicyName=policy_name,
729
- Statements=statements,
730
- aws_update_tag=aws_update_tag,
731
- ).consume()
815
+ for policy_id, statements in policy_statements.items():
816
+ load(
817
+ neo4j_session,
818
+ AWSPolicyStatementSchema(),
819
+ statements,
820
+ lastupdated=aws_update_tag,
821
+ POLICY_ID=policy_id,
822
+ )
732
823
 
733
824
 
734
825
  @timeit
735
826
  def load_policy_data(
736
827
  neo4j_session: neo4j.Session,
737
- principal_policy_map: Dict[str, Dict[str, Any]],
738
- policy_type: str,
828
+ transformed_policy_data: TransformedPolicyData,
739
829
  aws_update_tag: int,
830
+ current_aws_account_id: str,
740
831
  ) -> None:
741
- for principal_arn, policy_statement_map in principal_policy_map.items():
742
- logger.debug(f"Loading policies for principal {principal_arn}")
743
- for policy_key, statements in policy_statement_map.items():
744
- policy_name = (
745
- policy_key
746
- if policy_type == PolicyType.inline.value
747
- else get_policy_name_from_arn(policy_key)
748
- )
749
- policy_id = (
750
- transform_policy_id(
751
- principal_arn,
752
- policy_type,
753
- policy_key,
754
- )
755
- if policy_type == PolicyType.inline.value
756
- else policy_key
757
- )
758
- load_policy(
759
- neo4j_session,
760
- policy_id,
761
- policy_name,
762
- policy_type,
763
- principal_arn,
764
- aws_update_tag,
765
- )
766
- load_policy_statements(
767
- neo4j_session,
768
- policy_id,
769
- policy_name,
770
- statements,
771
- aws_update_tag,
772
- )
832
+ _load_policy(
833
+ neo4j_session,
834
+ transformed_policy_data.managed_policies,
835
+ transformed_policy_data.inline_policies,
836
+ current_aws_account_id,
837
+ aws_update_tag,
838
+ )
839
+
840
+ _load_policy_statements(
841
+ neo4j_session,
842
+ transformed_policy_data.statements_by_policy_id,
843
+ aws_update_tag,
844
+ )
773
845
 
774
846
 
775
847
  @timeit
776
848
  def sync_users(
777
849
  neo4j_session: neo4j.Session,
778
- boto3_session: boto3.session.Session,
850
+ boto3_session: boto3.Session,
779
851
  current_aws_account_id: str,
780
852
  aws_update_tag: int,
781
853
  common_job_parameters: Dict,
782
854
  ) -> None:
783
855
  logger.info("Syncing IAM users for account '%s'.", current_aws_account_id)
784
856
  data = get_user_list_data(boto3_session)
785
- load_users(neo4j_session, data["Users"], current_aws_account_id, aws_update_tag)
857
+ user_data = transform_users(data["Users"])
858
+ load_users(neo4j_session, user_data, current_aws_account_id, aws_update_tag)
786
859
 
787
- sync_user_inline_policies(boto3_session, data, neo4j_session, aws_update_tag)
860
+ sync_user_inline_policies(
861
+ boto3_session, data, neo4j_session, aws_update_tag, current_aws_account_id
862
+ )
788
863
 
789
- sync_user_managed_policies(boto3_session, data, neo4j_session, aws_update_tag)
864
+ sync_user_managed_policies(
865
+ boto3_session, data, neo4j_session, aws_update_tag, current_aws_account_id
866
+ )
790
867
 
791
- run_cleanup_job(
792
- "aws_import_users_cleanup.json",
793
- neo4j_session,
794
- common_job_parameters,
868
+
869
+ @timeit
870
+ def sync_user_access_keys(
871
+ neo4j_session: neo4j.Session,
872
+ boto3_session: boto3.Session,
873
+ current_aws_account_id: str,
874
+ aws_update_tag: int,
875
+ common_job_parameters: Dict,
876
+ ) -> None:
877
+ logger.info(
878
+ "Syncing IAM user access keys for account '%s'.", current_aws_account_id
879
+ )
880
+
881
+ # Query the graph for users instead of making another AWS API call
882
+ query = (
883
+ "MATCH (user:AWSUser)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) "
884
+ "RETURN user.name as name, user.arn as arn"
885
+ )
886
+ users = neo4j_session.execute_read(
887
+ read_list_of_dicts_tx,
888
+ query,
889
+ AWS_ID=current_aws_account_id,
890
+ )
891
+
892
+ user_access_keys = get_user_access_keys_data(boto3_session, users)
893
+ access_key_data = transform_access_keys(user_access_keys)
894
+ load_access_keys(
895
+ neo4j_session, access_key_data, aws_update_tag, current_aws_account_id
896
+ )
897
+ GraphJob.from_node_schema(AccountAccessKeySchema(), common_job_parameters).run(
898
+ neo4j_session
795
899
  )
796
900
 
797
901
 
798
902
  @timeit
799
903
  def sync_user_managed_policies(
800
- boto3_session: boto3.session.Session,
904
+ boto3_session: boto3.Session,
801
905
  data: Dict,
802
906
  neo4j_session: neo4j.Session,
803
907
  aws_update_tag: int,
908
+ current_aws_account_id: str,
804
909
  ) -> None:
805
910
  managed_policy_data = get_user_managed_policy_data(boto3_session, data["Users"])
806
- transform_policy_data(managed_policy_data, PolicyType.managed.value)
911
+ transformed_policy_data = transform_policy_data(
912
+ managed_policy_data, PolicyType.managed.value
913
+ )
807
914
  load_policy_data(
808
915
  neo4j_session,
809
- managed_policy_data,
810
- PolicyType.managed.value,
916
+ transformed_policy_data,
811
917
  aws_update_tag,
918
+ current_aws_account_id,
812
919
  )
813
920
 
814
921
 
815
922
  @timeit
816
923
  def sync_user_inline_policies(
817
- boto3_session: boto3.session.Session,
924
+ boto3_session: boto3.Session,
818
925
  data: Dict,
819
926
  neo4j_session: neo4j.Session,
820
927
  aws_update_tag: int,
928
+ current_aws_account_id: str,
821
929
  ) -> None:
822
930
  policy_data = get_user_policy_data(boto3_session, data["Users"])
823
- transform_policy_data(policy_data, PolicyType.inline.value)
931
+ transformed_policy_data = transform_policy_data(
932
+ policy_data, PolicyType.inline.value
933
+ )
824
934
  load_policy_data(
825
935
  neo4j_session,
826
- policy_data,
827
- PolicyType.inline.value,
936
+ transformed_policy_data,
828
937
  aws_update_tag,
938
+ current_aws_account_id,
829
939
  )
830
940
 
831
941
 
832
942
  @timeit
833
943
  def sync_groups(
834
944
  neo4j_session: neo4j.Session,
835
- boto3_session: boto3.session.Session,
945
+ boto3_session: boto3.Session,
836
946
  current_aws_account_id: str,
837
947
  aws_update_tag: int,
838
948
  common_job_parameters: Dict,
839
949
  ) -> None:
840
950
  logger.info("Syncing IAM groups for account '%s'.", current_aws_account_id)
841
951
  data = get_group_list_data(boto3_session)
842
- load_groups(neo4j_session, data["Groups"], current_aws_account_id, aws_update_tag)
843
-
844
- sync_groups_inline_policies(boto3_session, data, neo4j_session, aws_update_tag)
952
+ group_memberships = get_group_memberships(boto3_session, data["Groups"])
953
+ group_data = transform_groups(data["Groups"], group_memberships)
954
+ load_groups(neo4j_session, group_data, current_aws_account_id, aws_update_tag)
845
955
 
846
- sync_group_managed_policies(boto3_session, data, neo4j_session, aws_update_tag)
956
+ sync_groups_inline_policies(
957
+ boto3_session, data, neo4j_session, aws_update_tag, current_aws_account_id
958
+ )
847
959
 
848
- run_cleanup_job(
849
- "aws_import_groups_cleanup.json",
850
- neo4j_session,
851
- common_job_parameters,
960
+ sync_group_managed_policies(
961
+ boto3_session, data, neo4j_session, aws_update_tag, current_aws_account_id
852
962
  )
853
963
 
854
964
 
855
965
  def sync_group_managed_policies(
856
- boto3_session: boto3.session.Session,
966
+ boto3_session: boto3.Session,
857
967
  data: Dict,
858
968
  neo4j_session: neo4j.Session,
859
969
  aws_update_tag: int,
970
+ current_aws_account_id: str,
860
971
  ) -> None:
861
972
  managed_policy_data = get_group_managed_policy_data(boto3_session, data["Groups"])
862
- transform_policy_data(managed_policy_data, PolicyType.managed.value)
973
+ transformed_policy_data = transform_policy_data(
974
+ managed_policy_data, PolicyType.managed.value
975
+ )
863
976
  load_policy_data(
864
977
  neo4j_session,
865
- managed_policy_data,
866
- PolicyType.managed.value,
978
+ transformed_policy_data,
867
979
  aws_update_tag,
980
+ current_aws_account_id,
868
981
  )
869
982
 
870
983
 
871
984
  def sync_groups_inline_policies(
872
- boto3_session: boto3.session.Session,
985
+ boto3_session: boto3.Session,
873
986
  data: Dict,
874
987
  neo4j_session: neo4j.Session,
875
988
  aws_update_tag: int,
989
+ current_aws_account_id: str,
876
990
  ) -> None:
877
991
  policy_data = get_group_policy_data(boto3_session, data["Groups"])
878
- transform_policy_data(policy_data, PolicyType.inline.value)
992
+ transformed_policy_data = transform_policy_data(
993
+ policy_data, PolicyType.inline.value
994
+ )
879
995
  load_policy_data(
880
996
  neo4j_session,
881
- policy_data,
882
- PolicyType.inline.value,
997
+ transformed_policy_data,
998
+ aws_update_tag,
999
+ current_aws_account_id,
1000
+ )
1001
+
1002
+
1003
+ def load_external_aws_accounts(
1004
+ neo4j_session: neo4j.Session,
1005
+ external_aws_accounts: list[dict[str, Any]],
1006
+ aws_update_tag: int,
1007
+ ) -> None:
1008
+ load(
1009
+ neo4j_session,
1010
+ AWSAccountAWSRoleSchema(),
1011
+ external_aws_accounts,
1012
+ lastupdated=aws_update_tag,
1013
+ )
1014
+ # Ensure that the root principal exists for each external account.
1015
+ for account in external_aws_accounts:
1016
+ sync_root_principal(
1017
+ neo4j_session,
1018
+ account["id"],
1019
+ aws_update_tag,
1020
+ )
1021
+
1022
+
1023
+ @timeit
1024
+ def load_service_principals(
1025
+ neo4j_session: neo4j.Session,
1026
+ service_principals: list[dict[str, Any]],
1027
+ aws_update_tag: int,
1028
+ ) -> None:
1029
+ load(
1030
+ neo4j_session,
1031
+ AWSServicePrincipalSchema(),
1032
+ service_principals,
1033
+ lastupdated=aws_update_tag,
1034
+ )
1035
+
1036
+
1037
+ @timeit
1038
+ def load_role_data(
1039
+ neo4j_session: neo4j.Session,
1040
+ role_list: list[dict[str, Any]],
1041
+ current_aws_account_id: str,
1042
+ aws_update_tag: int,
1043
+ ) -> None:
1044
+ # Note that the account_id is set in the transform_roles function instead of from the `AWS_ID` kwarg like in other modules
1045
+ # because this can create root principals from other accounts based on data from the assume role policy document.
1046
+ load(
1047
+ neo4j_session,
1048
+ AWSRoleSchema(),
1049
+ role_list,
1050
+ lastupdated=aws_update_tag,
1051
+ AWS_ID=current_aws_account_id,
1052
+ )
1053
+
1054
+
1055
+ @timeit
1056
+ def load_federated_principals(
1057
+ neo4j_session: neo4j.Session,
1058
+ federated_principals: list[dict[str, Any]],
1059
+ current_aws_account_id: str,
1060
+ aws_update_tag: int,
1061
+ ) -> None:
1062
+ load(
1063
+ neo4j_session,
1064
+ AWSFederatedPrincipalSchema(),
1065
+ federated_principals,
1066
+ lastupdated=aws_update_tag,
1067
+ AWS_ID=current_aws_account_id,
1068
+ )
1069
+
1070
+
1071
+ @timeit
1072
+ def sync_role_assumptions(
1073
+ neo4j_session: neo4j.Session,
1074
+ data: dict[str, Any],
1075
+ current_aws_account_id: str,
1076
+ aws_update_tag: int,
1077
+ ) -> None:
1078
+ transformed = transform_role_trust_policies(data["Roles"], current_aws_account_id)
1079
+
1080
+ # Order matters here.
1081
+ # External accounts come first because they need to be created before the roles that trust them.
1082
+ load_external_aws_accounts(
1083
+ neo4j_session, transformed.external_aws_accounts, aws_update_tag
1084
+ )
1085
+ # Service principals e.g. arn = "ec2.amazonaws.com" come next because they're global
1086
+ load_service_principals(
1087
+ neo4j_session, transformed.service_principals, aws_update_tag
1088
+ )
1089
+ load_federated_principals(
1090
+ neo4j_session,
1091
+ transformed.federated_principals,
1092
+ current_aws_account_id,
883
1093
  aws_update_tag,
884
1094
  )
1095
+ load_role_data(
1096
+ neo4j_session, transformed.role_data, current_aws_account_id, aws_update_tag
1097
+ )
885
1098
 
886
1099
 
887
1100
  @timeit
888
1101
  def sync_roles(
889
1102
  neo4j_session: neo4j.Session,
890
- boto3_session: boto3.session.Session,
1103
+ boto3_session: boto3.Session,
891
1104
  current_aws_account_id: str,
892
1105
  aws_update_tag: int,
893
1106
  common_job_parameters: Dict,
894
1107
  ) -> None:
895
1108
  logger.info("Syncing IAM roles for account '%s'.", current_aws_account_id)
896
1109
  data = get_role_list_data(boto3_session)
897
- load_roles(neo4j_session, data["Roles"], current_aws_account_id, aws_update_tag)
1110
+
1111
+ sync_role_assumptions(neo4j_session, data, current_aws_account_id, aws_update_tag)
898
1112
 
899
1113
  sync_role_inline_policies(
900
1114
  current_aws_account_id,
@@ -912,16 +1126,10 @@ def sync_roles(
912
1126
  aws_update_tag,
913
1127
  )
914
1128
 
915
- run_cleanup_job(
916
- "aws_import_roles_cleanup.json",
917
- neo4j_session,
918
- common_job_parameters,
919
- )
920
-
921
1129
 
922
1130
  def sync_role_managed_policies(
923
1131
  current_aws_account_id: str,
924
- boto3_session: boto3.session.Session,
1132
+ boto3_session: boto3.Session,
925
1133
  data: Dict,
926
1134
  neo4j_session: neo4j.Session,
927
1135
  aws_update_tag: int,
@@ -931,18 +1139,20 @@ def sync_role_managed_policies(
931
1139
  current_aws_account_id,
932
1140
  )
933
1141
  managed_policy_data = get_role_managed_policy_data(boto3_session, data["Roles"])
934
- transform_policy_data(managed_policy_data, PolicyType.managed.value)
1142
+ transformed_policy_data = transform_policy_data(
1143
+ managed_policy_data, PolicyType.managed.value
1144
+ )
935
1145
  load_policy_data(
936
1146
  neo4j_session,
937
- managed_policy_data,
938
- PolicyType.managed.value,
1147
+ transformed_policy_data,
939
1148
  aws_update_tag,
1149
+ current_aws_account_id,
940
1150
  )
941
1151
 
942
1152
 
943
1153
  def sync_role_inline_policies(
944
1154
  current_aws_account_id: str,
945
- boto3_session: boto3.session.Session,
1155
+ boto3_session: boto3.Session,
946
1156
  data: Dict,
947
1157
  neo4j_session: neo4j.Session,
948
1158
  aws_update_tag: int,
@@ -952,76 +1162,121 @@ def sync_role_inline_policies(
952
1162
  current_aws_account_id,
953
1163
  )
954
1164
  inline_policy_data = get_role_policy_data(boto3_session, data["Roles"])
955
- transform_policy_data(inline_policy_data, PolicyType.inline.value)
1165
+ transformed_policy_data = transform_policy_data(
1166
+ inline_policy_data, PolicyType.inline.value
1167
+ )
956
1168
  load_policy_data(
957
1169
  neo4j_session,
958
- inline_policy_data,
959
- PolicyType.inline.value,
1170
+ transformed_policy_data,
960
1171
  aws_update_tag,
1172
+ current_aws_account_id,
961
1173
  )
962
1174
 
963
1175
 
1176
+ def _get_policies_in_current_account(
1177
+ neo4j_session: neo4j.Session, current_aws_account_id: str
1178
+ ) -> list[str]:
1179
+ query = """
1180
+ MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(p:AWSPolicy)
1181
+ RETURN p.id
1182
+ """
1183
+ return [
1184
+ str(policy_id)
1185
+ for policy_id in neo4j_session.execute_read(
1186
+ read_list_of_values_tx,
1187
+ query,
1188
+ AWS_ID=current_aws_account_id,
1189
+ )
1190
+ ]
1191
+
1192
+
1193
+ def _get_principals_with_pols_in_current_account(
1194
+ neo4j_session: neo4j.Session, current_aws_account_id: str
1195
+ ) -> list[str]:
1196
+ query = """
1197
+ MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(p:AWSPrincipal)
1198
+ WHERE (p)-[:POLICY]->(:AWSPolicy)
1199
+ RETURN p.id
1200
+ """
1201
+ return [
1202
+ str(principal_id)
1203
+ for principal_id in neo4j_session.execute_read(
1204
+ read_list_of_values_tx,
1205
+ query,
1206
+ AWS_ID=current_aws_account_id,
1207
+ )
1208
+ ]
1209
+
1210
+
964
1211
  @timeit
965
- def sync_group_memberships(
966
- neo4j_session: neo4j.Session,
967
- boto3_session: boto3.session.Session,
968
- current_aws_account_id: str,
969
- aws_update_tag: int,
970
- common_job_parameters: Dict,
971
- ) -> None:
972
- logger.info(
973
- "Syncing IAM group membership for account '%s'.",
974
- current_aws_account_id,
1212
+ def cleanup_iam(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
1213
+ # List all policies in the current account
1214
+ policy_ids = _get_policies_in_current_account(
1215
+ neo4j_session, common_job_parameters["AWS_ID"]
975
1216
  )
976
- query = (
977
- "MATCH (group:AWSGroup)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ACCOUNT_ID}) "
978
- "return group.name as name, group.arn as arn;"
979
- )
980
- groups = neo4j_session.run(query, AWS_ACCOUNT_ID=current_aws_account_id)
981
- groups_membership = {
982
- group["arn"]: get_group_membership_data(boto3_session, group["name"])
983
- for group in groups
984
- }
985
- load_group_memberships(neo4j_session, groups_membership, aws_update_tag)
986
- run_cleanup_job(
987
- "aws_import_groups_membership_cleanup.json",
988
- neo4j_session,
989
- common_job_parameters,
1217
+
1218
+ # for each policy id, run the cleanup job for the policy statements, passing the policy id as a kwarg.
1219
+ for policy_id in policy_ids:
1220
+ GraphJob.from_node_schema(
1221
+ AWSPolicyStatementSchema(),
1222
+ {**common_job_parameters, "POLICY_ID": policy_id},
1223
+ ).run(
1224
+ neo4j_session,
1225
+ )
1226
+
1227
+ # Next, clean up the policies
1228
+ # Note that managed policies don't have a sub resource relationship. This means that we will only clean up
1229
+ # stale relationships and not stale AWSManagedPolicy nodes. This is because AWSManagedPolicy nodes are global
1230
+ # to AWS and it is possible for them to be shared across accounts, so if we cleaned up an AWSManagedPolicy node
1231
+ # for one account, it would be erroneously deleted for all accounts. Instead, we just clean up the relationships.
1232
+ GraphJob.from_node_schema(AWSManagedPolicySchema(), common_job_parameters).run(
1233
+ neo4j_session
990
1234
  )
991
1235
 
1236
+ # Inline policies are simpler in that they are scoped to a single principal and therefore attached to that
1237
+ # principal's account. This means that this operation will clean up stale AWSInlinePolicy nodes.
1238
+ GraphJob.from_node_schema(AWSInlinePolicySchema(), common_job_parameters).run(
1239
+ neo4j_session
1240
+ )
992
1241
 
993
- @timeit
994
- def sync_user_access_keys(
995
- neo4j_session: neo4j.Session,
996
- boto3_session: boto3.session.Session,
997
- current_aws_account_id: str,
998
- aws_update_tag: int,
999
- common_job_parameters: Dict,
1000
- ) -> None:
1001
- logger.info(
1002
- "Syncing IAM user access keys for account '%s'.",
1003
- current_aws_account_id,
1242
+ # Clean up roles before federated and service principals
1243
+ GraphJob.from_node_schema(AWSRoleSchema(), common_job_parameters).run(neo4j_session)
1244
+ GraphJob.from_node_schema(AWSFederatedPrincipalSchema(), common_job_parameters).run(
1245
+ neo4j_session
1004
1246
  )
1005
- query = (
1006
- "MATCH (user:AWSUser)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ACCOUNT_ID}) "
1007
- "RETURN user.name as name, user.arn as arn"
1247
+ GraphJob.from_node_schema(AWSServicePrincipalSchema(), common_job_parameters).run(
1248
+ neo4j_session
1249
+ )
1250
+ GraphJob.from_node_schema(AWSUserSchema(), common_job_parameters).run(neo4j_session)
1251
+ GraphJob.from_node_schema(AWSGroupSchema(), common_job_parameters).run(
1252
+ neo4j_session
1008
1253
  )
1009
- for user in neo4j_session.run(query, AWS_ACCOUNT_ID=current_aws_account_id):
1010
- access_keys = get_account_access_key_data(boto3_session, user["name"])
1011
- if access_keys:
1012
- account_access_keys = {user["arn"]: access_keys}
1013
- load_user_access_keys(neo4j_session, account_access_keys, aws_update_tag)
1014
- run_cleanup_job(
1015
- "aws_import_account_access_key_cleanup.json",
1254
+
1255
+
1256
+ def sync_root_principal(
1257
+ neo4j_session: neo4j.Session, current_aws_account_id: str, aws_update_tag: int
1258
+ ) -> None:
1259
+ """
1260
+ In the current account, create a node for the AWS root principal "arn:aws:iam::<account_id>:root".
1261
+
1262
+ If a role X trusts the root principal in an account A, then any other role Y in A can assume X.
1263
+
1264
+ Note that this is _not_ the same as the AWS root user. The root principal doesn't show up in any
1265
+ APIs except for assumerole trust policies.
1266
+ """
1267
+ load(
1016
1268
  neo4j_session,
1017
- common_job_parameters,
1269
+ AWSRootPrincipalSchema(),
1270
+ [{"arn": f"arn:aws:iam::{current_aws_account_id}:root"}],
1271
+ lastupdated=aws_update_tag,
1272
+ AWS_ID=current_aws_account_id,
1018
1273
  )
1019
1274
 
1020
1275
 
1021
1276
  @timeit
1022
1277
  def sync(
1023
1278
  neo4j_session: neo4j.Session,
1024
- boto3_session: boto3.session.Session,
1279
+ boto3_session: boto3.Session,
1025
1280
  regions: List[str],
1026
1281
  current_aws_account_id: str,
1027
1282
  update_tag: int,
@@ -1030,28 +1285,26 @@ def sync(
1030
1285
  logger.info("Syncing IAM for account '%s'.", current_aws_account_id)
1031
1286
  # This module only syncs IAM information that is in use.
1032
1287
  # As such only policies that are attached to a user, role or group are synced
1033
- sync_users(
1288
+ sync_root_principal(
1034
1289
  neo4j_session,
1035
- boto3_session,
1036
1290
  current_aws_account_id,
1037
1291
  update_tag,
1038
- common_job_parameters,
1039
1292
  )
1040
- sync_groups(
1293
+ sync_users(
1041
1294
  neo4j_session,
1042
1295
  boto3_session,
1043
1296
  current_aws_account_id,
1044
1297
  update_tag,
1045
1298
  common_job_parameters,
1046
1299
  )
1047
- sync_roles(
1300
+ sync_groups(
1048
1301
  neo4j_session,
1049
1302
  boto3_session,
1050
1303
  current_aws_account_id,
1051
1304
  update_tag,
1052
1305
  common_job_parameters,
1053
1306
  )
1054
- sync_group_memberships(
1307
+ sync_roles(
1055
1308
  neo4j_session,
1056
1309
  boto3_session,
1057
1310
  current_aws_account_id,
@@ -1071,11 +1324,7 @@ def sync(
1071
1324
  update_tag,
1072
1325
  common_job_parameters,
1073
1326
  )
1074
- run_cleanup_job(
1075
- "aws_import_principals_cleanup.json",
1076
- neo4j_session,
1077
- common_job_parameters,
1078
- )
1327
+ cleanup_iam(neo4j_session, common_job_parameters)
1079
1328
  merge_module_sync_metadata(
1080
1329
  neo4j_session,
1081
1330
  group_type="AWSAccount",