cartography 0.110.0rc1__py3-none-any.whl → 0.111.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (87) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +46 -8
  3. cartography/config.py +16 -9
  4. cartography/data/indexes.cypher +0 -2
  5. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  6. cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
  7. cartography/graph/querybuilder.py +70 -0
  8. cartography/intel/aws/apigateway.py +113 -4
  9. cartography/intel/aws/cognito.py +201 -0
  10. cartography/intel/aws/ec2/vpc.py +140 -124
  11. cartography/intel/aws/ecs.py +7 -1
  12. cartography/intel/aws/eventbridge.py +73 -0
  13. cartography/intel/aws/glue.py +64 -0
  14. cartography/intel/aws/kms.py +13 -1
  15. cartography/intel/aws/rds.py +105 -0
  16. cartography/intel/aws/resources.py +2 -0
  17. cartography/intel/aws/route53.py +3 -1
  18. cartography/intel/aws/s3.py +104 -0
  19. cartography/intel/entra/__init__.py +41 -43
  20. cartography/intel/entra/applications.py +2 -1
  21. cartography/intel/entra/ou.py +1 -1
  22. cartography/intel/github/__init__.py +21 -25
  23. cartography/intel/github/repos.py +32 -48
  24. cartography/intel/github/util.py +12 -0
  25. cartography/intel/keycloak/__init__.py +153 -0
  26. cartography/intel/keycloak/authenticationexecutions.py +322 -0
  27. cartography/intel/keycloak/authenticationflows.py +77 -0
  28. cartography/intel/keycloak/clients.py +187 -0
  29. cartography/intel/keycloak/groups.py +126 -0
  30. cartography/intel/keycloak/identityproviders.py +94 -0
  31. cartography/intel/keycloak/organizations.py +163 -0
  32. cartography/intel/keycloak/realms.py +61 -0
  33. cartography/intel/keycloak/roles.py +202 -0
  34. cartography/intel/keycloak/scopes.py +73 -0
  35. cartography/intel/keycloak/users.py +70 -0
  36. cartography/intel/keycloak/util.py +47 -0
  37. cartography/intel/kubernetes/__init__.py +4 -0
  38. cartography/intel/kubernetes/rbac.py +464 -0
  39. cartography/intel/kubernetes/util.py +17 -0
  40. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  41. cartography/models/aws/cognito/__init__.py +0 -0
  42. cartography/models/aws/cognito/identity_pool.py +70 -0
  43. cartography/models/aws/cognito/user_pool.py +47 -0
  44. cartography/models/aws/ec2/security_groups.py +1 -1
  45. cartography/models/aws/ec2/vpc.py +46 -0
  46. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  47. cartography/models/aws/ecs/services.py +17 -0
  48. cartography/models/aws/ecs/tasks.py +1 -0
  49. cartography/models/aws/eventbridge/target.py +71 -0
  50. cartography/models/aws/glue/job.py +69 -0
  51. cartography/models/aws/rds/event_subscription.py +146 -0
  52. cartography/models/aws/route53/dnsrecord.py +21 -0
  53. cartography/models/github/dependencies.py +1 -2
  54. cartography/models/keycloak/__init__.py +0 -0
  55. cartography/models/keycloak/authenticationexecution.py +160 -0
  56. cartography/models/keycloak/authenticationflow.py +54 -0
  57. cartography/models/keycloak/client.py +177 -0
  58. cartography/models/keycloak/group.py +101 -0
  59. cartography/models/keycloak/identityprovider.py +89 -0
  60. cartography/models/keycloak/organization.py +116 -0
  61. cartography/models/keycloak/organizationdomain.py +73 -0
  62. cartography/models/keycloak/realm.py +173 -0
  63. cartography/models/keycloak/role.py +126 -0
  64. cartography/models/keycloak/scope.py +73 -0
  65. cartography/models/keycloak/user.py +51 -0
  66. cartography/models/kubernetes/clusterrolebindings.py +98 -0
  67. cartography/models/kubernetes/clusterroles.py +52 -0
  68. cartography/models/kubernetes/rolebindings.py +119 -0
  69. cartography/models/kubernetes/roles.py +76 -0
  70. cartography/models/kubernetes/serviceaccounts.py +77 -0
  71. cartography/models/tailscale/device.py +1 -0
  72. cartography/sync.py +2 -0
  73. cartography/util.py +8 -0
  74. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/METADATA +4 -3
  75. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/RECORD +85 -46
  76. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  77. cartography/intel/entra/resources.py +0 -20
  78. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  79. /cartography/models/aws/{__init__.py → apigateway/__init__.py} +0 -0
  80. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  81. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  82. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  83. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  84. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/WHEEL +0 -0
  85. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/entry_points.txt +0 -0
  86. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/licenses/LICENSE +0 -0
  87. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ from cartography.client.core.tx import load
10
10
  from cartography.graph.job import GraphJob
11
11
  from cartography.intel.aws.ec2.util import get_botocore_config
12
12
  from cartography.models.aws.glue.connection import GlueConnectionSchema
13
+ from cartography.models.aws.glue.job import GlueJobSchema
13
14
  from cartography.util import aws_handle_regions
14
15
  from cartography.util import timeit
15
16
 
@@ -32,6 +33,37 @@ def get_glue_connections(
32
33
  return connections
33
34
 
34
35
 
36
+ @timeit
37
+ @aws_handle_regions
38
+ def get_glue_jobs(boto3_session: boto3.Session, region: str) -> List[Dict[str, Any]]:
39
+ client = boto3_session.client(
40
+ "glue", region_name=region, config=get_botocore_config()
41
+ )
42
+ paginator = client.get_paginator("get_jobs")
43
+ jobs = []
44
+ for page in paginator.paginate():
45
+ jobs.extend(page.get("Jobs", []))
46
+ return jobs
47
+
48
+
49
+ def transform_glue_job(jobs: List[Dict[str, Any]], region: str) -> List[Dict[str, Any]]:
50
+ """
51
+ Transform Glue job data for ingestion
52
+ """
53
+ transformed_jobs = []
54
+ for job in jobs:
55
+ transformed_job = {
56
+ "Name": job["Name"],
57
+ "ProfileName": job.get("ProfileName"),
58
+ "JobMode": job.get("JobMode"),
59
+ "Connections": job.get("Connections", {}).get("Connections"),
60
+ "Region": region,
61
+ "Description": job.get("Description"),
62
+ }
63
+ transformed_jobs.append(transformed_job)
64
+ return transformed_jobs
65
+
66
+
35
67
  def transform_glue_connections(
36
68
  connections: List[Dict[str, Any]], region: str
37
69
  ) -> List[Dict[str, Any]]:
@@ -79,6 +111,27 @@ def load_glue_connections(
79
111
  )
80
112
 
81
113
 
114
+ @timeit
115
+ def load_glue_jobs(
116
+ neo4j_session: neo4j.Session,
117
+ data: List[Dict[str, Any]],
118
+ region: str,
119
+ current_aws_account_id: str,
120
+ aws_update_tag: int,
121
+ ) -> None:
122
+ logger.info(
123
+ f"Loading Glue {len(data)} jobs for region '{region}' into graph.",
124
+ )
125
+ load(
126
+ neo4j_session,
127
+ GlueJobSchema(),
128
+ data,
129
+ lastupdated=aws_update_tag,
130
+ Region=region,
131
+ AWS_ID=current_aws_account_id,
132
+ )
133
+
134
+
82
135
  @timeit
83
136
  def cleanup(
84
137
  neo4j_session: neo4j.Session,
@@ -88,6 +141,7 @@ def cleanup(
88
141
  GraphJob.from_node_schema(GlueConnectionSchema(), common_job_parameters).run(
89
142
  neo4j_session
90
143
  )
144
+ GraphJob.from_node_schema(GlueJobSchema(), common_job_parameters).run(neo4j_session)
91
145
 
92
146
 
93
147
  @timeit
@@ -114,4 +168,14 @@ def sync(
114
168
  update_tag,
115
169
  )
116
170
 
171
+ jobs = get_glue_jobs(boto3_session, region)
172
+ transformed_jobs = transform_glue_job(jobs, region)
173
+ load_glue_jobs(
174
+ neo4j_session,
175
+ transformed_jobs,
176
+ region,
177
+ current_aws_account_id,
178
+ update_tag,
179
+ )
180
+
117
181
  cleanup(neo4j_session, common_job_parameters)
@@ -76,8 +76,8 @@ def get_policy(key: Dict, client: botocore.client.BaseClient) -> Any:
76
76
  try:
77
77
  policy = client.get_key_policy(KeyId=key["KeyId"], PolicyName="default")
78
78
  except ClientError as e:
79
- policy = None
80
79
  if e.response["Error"]["Code"] == "AccessDeniedException":
80
+ policy = None
81
81
  logger.warning(
82
82
  f"kms:get_key_policy on key id {key['KeyId']} failed with AccessDeniedException; continuing sync.",
83
83
  exc_info=True,
@@ -187,6 +187,18 @@ def transform_kms_key_policies(
187
187
  policy_data = {}
188
188
 
189
189
  for key_id, policy, *_ in policy_alias_grants_data:
190
+ # Handle keys with null policy (access denied)
191
+ if policy is None:
192
+ logger.info(
193
+ f"Skipping KMS key {key_id} policy due to AccessDenied; policy analysis properties will be null"
194
+ )
195
+ policy_data[key_id] = {
196
+ "kms_key": key_id,
197
+ "anonymous_access": None,
198
+ "anonymous_actions": None,
199
+ }
200
+ continue
201
+
190
202
  parsed_policy = parse_policy(key_id, policy)
191
203
  policy_data[key_id] = parsed_policy
192
204
 
@@ -9,6 +9,7 @@ import neo4j
9
9
  from cartography.client.core.tx import load
10
10
  from cartography.graph.job import GraphJob
11
11
  from cartography.models.aws.rds.cluster import RDSClusterSchema
12
+ from cartography.models.aws.rds.event_subscription import RDSEventSubscriptionSchema
12
13
  from cartography.models.aws.rds.instance import RDSInstanceSchema
13
14
  from cartography.models.aws.rds.snapshot import RDSSnapshotSchema
14
15
  from cartography.models.aws.rds.subnet_group import DBSubnetGroupSchema
@@ -136,6 +137,38 @@ def load_rds_snapshots(
136
137
  )
137
138
 
138
139
 
140
+ @timeit
141
+ @aws_handle_regions
142
+ def get_rds_event_subscription_data(
143
+ boto3_session: boto3.session.Session,
144
+ region: str,
145
+ ) -> List[Dict[str, Any]]:
146
+ client = boto3_session.client("rds", region_name=region)
147
+ paginator = client.get_paginator("describe_event_subscriptions")
148
+ subscriptions = []
149
+ for page in paginator.paginate():
150
+ subscriptions.extend(page["EventSubscriptionsList"])
151
+ return subscriptions
152
+
153
+
154
+ @timeit
155
+ def load_rds_event_subscriptions(
156
+ neo4j_session: neo4j.Session,
157
+ data: List[Dict],
158
+ region: str,
159
+ current_aws_account_id: str,
160
+ aws_update_tag: int,
161
+ ) -> None:
162
+ load(
163
+ neo4j_session,
164
+ RDSEventSubscriptionSchema(),
165
+ data,
166
+ lastupdated=aws_update_tag,
167
+ Region=region,
168
+ AWS_ID=current_aws_account_id,
169
+ )
170
+
171
+
139
172
  def _validate_rds_endpoint(rds: Dict) -> Dict:
140
173
  """
141
174
  Get Endpoint from RDS data structure. Log to debug if an Endpoint field does not exist.
@@ -292,6 +325,28 @@ def transform_rds_instances(
292
325
  return instances
293
326
 
294
327
 
328
+ def transform_rds_event_subscriptions(data: List[Dict]) -> List[Dict]:
329
+ subscriptions = []
330
+ for subscription in data:
331
+ transformed = {
332
+ "CustSubscriptionId": subscription.get("CustSubscriptionId"),
333
+ "EventSubscriptionArn": subscription.get("EventSubscriptionArn"),
334
+ "CustomerAwsId": subscription.get("CustomerAwsId"),
335
+ "SnsTopicArn": subscription.get("SnsTopicArn"),
336
+ "SourceType": subscription.get("SourceType"),
337
+ "Status": subscription.get("Status"),
338
+ "Enabled": subscription.get("Enabled"),
339
+ "SubscriptionCreationTime": dict_value_to_str(
340
+ subscription, "SubscriptionCreationTime"
341
+ ),
342
+ "event_categories": subscription.get("EventCategoriesList") or None,
343
+ "source_ids": subscription.get("SourceIdsList") or None,
344
+ "lastupdated": None, # This will be set by the loader
345
+ }
346
+ subscriptions.append(transformed)
347
+ return subscriptions
348
+
349
+
295
350
  def transform_rds_subnet_groups(
296
351
  data: List[Dict], region: str, current_aws_account_id: str
297
352
  ) -> List[Dict]:
@@ -412,6 +467,20 @@ def cleanup_rds_snapshots(
412
467
  )
413
468
 
414
469
 
470
+ @timeit
471
+ def cleanup_rds_event_subscriptions(
472
+ neo4j_session: neo4j.Session,
473
+ common_job_parameters: Dict,
474
+ ) -> None:
475
+ """
476
+ Remove RDS event subscriptions that weren't updated in this sync run
477
+ """
478
+ logger.debug("Running RDS event subscriptions cleanup job")
479
+ GraphJob.from_node_schema(RDSEventSubscriptionSchema(), common_job_parameters).run(
480
+ neo4j_session
481
+ )
482
+
483
+
415
484
  @timeit
416
485
  def sync_rds_clusters(
417
486
  neo4j_session: neo4j.Session,
@@ -498,6 +567,32 @@ def sync_rds_snapshots(
498
567
  cleanup_rds_snapshots(neo4j_session, common_job_parameters)
499
568
 
500
569
 
570
+ @timeit
571
+ def sync_rds_event_subscriptions(
572
+ neo4j_session: neo4j.Session,
573
+ boto3_session: boto3.session.Session,
574
+ regions: List[str],
575
+ current_aws_account_id: str,
576
+ update_tag: int,
577
+ common_job_parameters: Dict,
578
+ ) -> None:
579
+ """
580
+ Grab RDS event subscription data from AWS, ingest to neo4j, and run the cleanup job.
581
+ """
582
+ for region in regions:
583
+ logger.info(
584
+ "Syncing RDS event subscriptions for region '%s' in account '%s'.",
585
+ region,
586
+ current_aws_account_id,
587
+ )
588
+ data = get_rds_event_subscription_data(boto3_session, region)
589
+ transformed = transform_rds_event_subscriptions(data)
590
+ load_rds_event_subscriptions(
591
+ neo4j_session, transformed, region, current_aws_account_id, update_tag
592
+ )
593
+ cleanup_rds_event_subscriptions(neo4j_session, common_job_parameters)
594
+
595
+
501
596
  @timeit
502
597
  def sync(
503
598
  neo4j_session: neo4j.Session,
@@ -531,6 +626,16 @@ def sync(
531
626
  update_tag,
532
627
  common_job_parameters,
533
628
  )
629
+
630
+ sync_rds_event_subscriptions(
631
+ neo4j_session,
632
+ boto3_session,
633
+ regions,
634
+ current_aws_account_id,
635
+ update_tag,
636
+ common_job_parameters,
637
+ )
638
+
534
639
  merge_module_sync_metadata(
535
640
  neo4j_session,
536
641
  group_type="AWSAccount",
@@ -9,6 +9,7 @@ from . import cloudtrail
9
9
  from . import cloudtrail_management_events
10
10
  from . import cloudwatch
11
11
  from . import codebuild
12
+ from . import cognito
12
13
  from . import config
13
14
  from . import dynamodb
14
15
  from . import ecr
@@ -116,6 +117,7 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
116
117
  "efs": efs.sync,
117
118
  "guardduty": guardduty.sync,
118
119
  "codebuild": codebuild.sync,
120
+ "cognito": cognito.sync,
119
121
  "eventbridge": eventbridge.sync,
120
122
  "glue": glue.sync,
121
123
  }
@@ -398,7 +398,9 @@ def link_sub_zones(
398
398
  MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(z:AWSDNSZone)
399
399
  <-[:MEMBER_OF_DNS_ZONE]-(record:DNSRecord{type:"NS"})
400
400
  -[:DNS_POINTS_TO]->(ns:NameServer)<-[:NAMESERVER]-(z2:AWSDNSZone)
401
- WHERE record.name=z2.name AND NOT z=z2
401
+ WHERE record.name = z2.name AND
402
+ z2.name ENDS WITH '.' + z.name AND
403
+ NOT z = z2
402
404
  RETURN z.id as zone_id, z2.id as subzone_id
403
405
  """
404
406
  zone_to_subzone = neo4j_session.read_transaction(
@@ -71,6 +71,7 @@ def get_s3_bucket_details(
71
71
  Dict[str, Any],
72
72
  Dict[str, Any],
73
73
  Dict[str, Any],
74
+ Dict[str, Any],
74
75
  ]
75
76
 
76
77
  async def _get_bucket_detail(bucket: Dict[str, Any]) -> BucketDetail:
@@ -88,6 +89,7 @@ def get_s3_bucket_details(
88
89
  versioning,
89
90
  public_access_block,
90
91
  bucket_ownership_controls,
92
+ bucket_logging,
91
93
  ) = await asyncio.gather(
92
94
  to_asynchronous(get_acl, bucket, client),
93
95
  to_asynchronous(get_policy, bucket, client),
@@ -95,6 +97,7 @@ def get_s3_bucket_details(
95
97
  to_asynchronous(get_versioning, bucket, client),
96
98
  to_asynchronous(get_public_access_block, bucket, client),
97
99
  to_asynchronous(get_bucket_ownership_controls, bucket, client),
100
+ to_asynchronous(get_bucket_logging, bucket, client),
98
101
  )
99
102
  return (
100
103
  bucket["Name"],
@@ -104,6 +107,7 @@ def get_s3_bucket_details(
104
107
  versioning,
105
108
  public_access_block,
106
109
  bucket_ownership_controls,
110
+ bucket_logging,
107
111
  )
108
112
 
109
113
  bucket_details = to_synchronous(
@@ -241,6 +245,29 @@ def get_bucket_ownership_controls(
241
245
  return bucket_ownership_controls
242
246
 
243
247
 
248
+ @timeit
249
+ @aws_handle_regions
250
+ def get_bucket_logging(
251
+ bucket: Dict, client: botocore.client.BaseClient
252
+ ) -> Optional[Dict]:
253
+ """
254
+ Gets the S3 bucket logging status configuration.
255
+ """
256
+ bucket_logging = None
257
+ try:
258
+ bucket_logging = client.get_bucket_logging(Bucket=bucket["Name"])
259
+ except ClientError as e:
260
+ if _is_common_exception(e, bucket):
261
+ pass
262
+ else:
263
+ raise
264
+ except EndpointConnectionError:
265
+ logger.warning(
266
+ f"Failed to retrieve S3 bucket logging status for {bucket['Name']} - Could not connect to the endpoint URL",
267
+ )
268
+ return bucket_logging
269
+
270
+
244
271
  @timeit
245
272
  def _is_common_exception(e: Exception, bucket: Dict) -> bool:
246
273
  error_msg = "Failed to retrieve S3 bucket detail"
@@ -319,6 +346,7 @@ def _load_s3_acls(
319
346
  "aws_s3acl_analysis.json",
320
347
  neo4j_session,
321
348
  {"AWS_ID": aws_account_id},
349
+ package="cartography.data.jobs.scoped_analysis",
322
350
  )
323
351
 
324
352
 
@@ -479,6 +507,30 @@ def _load_bucket_ownership_controls(
479
507
  )
480
508
 
481
509
 
510
+ @timeit
511
+ def _load_bucket_logging(
512
+ neo4j_session: neo4j.Session,
513
+ bucket_logging_configs: List[Dict],
514
+ update_tag: int,
515
+ ) -> None:
516
+ """
517
+ Ingest S3 bucket logging status configuration into neo4j.
518
+ """
519
+ # Load basic logging status
520
+ ingest_bucket_logging = """
521
+ UNWIND $bucket_logging_configs AS bucket_logging
522
+ MATCH (bucket:S3Bucket{name: bucket_logging.bucket})
523
+ SET bucket.logging_enabled = bucket_logging.logging_enabled,
524
+ bucket.logging_target_bucket = bucket_logging.target_bucket,
525
+ bucket.lastupdated = $update_tag
526
+ """
527
+ neo4j_session.run(
528
+ ingest_bucket_logging,
529
+ bucket_logging_configs=bucket_logging_configs,
530
+ update_tag=update_tag,
531
+ )
532
+
533
+
482
534
  def _set_default_values(neo4j_session: neo4j.Session, aws_account_id: str) -> None:
483
535
  set_defaults = """
484
536
  MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(s:S3Bucket) where s.anonymous_actions IS NULL
@@ -516,6 +568,7 @@ def load_s3_details(
516
568
  versioning_configs: List[Dict] = []
517
569
  public_access_block_configs: List[Dict] = []
518
570
  bucket_ownership_controls_configs: List[Dict] = []
571
+ bucket_logging_configs: List[Dict] = []
519
572
  for (
520
573
  bucket,
521
574
  acl,
@@ -524,6 +577,7 @@ def load_s3_details(
524
577
  versioning,
525
578
  public_access_block,
526
579
  bucket_ownership_controls,
580
+ bucket_logging,
527
581
  ) in s3_details_iter:
528
582
  parsed_acls = parse_acl(acl, bucket, aws_account_id)
529
583
  if parsed_acls is not None:
@@ -551,6 +605,9 @@ def load_s3_details(
551
605
  )
552
606
  if parsed_bucket_ownership_controls is not None:
553
607
  bucket_ownership_controls_configs.append(parsed_bucket_ownership_controls)
608
+ parsed_bucket_logging = parse_bucket_logging(bucket, bucket_logging)
609
+ if parsed_bucket_logging is not None:
610
+ bucket_logging_configs.append(parsed_bucket_logging)
554
611
 
555
612
  # cleanup existing policy properties set on S3 Buckets
556
613
  run_cleanup_job(
@@ -569,6 +626,7 @@ def load_s3_details(
569
626
  _load_bucket_ownership_controls(
570
627
  neo4j_session, bucket_ownership_controls_configs, update_tag
571
628
  )
629
+ _load_bucket_logging(neo4j_session, bucket_logging_configs, update_tag)
572
630
  _set_default_values(neo4j_session, aws_account_id)
573
631
 
574
632
 
@@ -851,6 +909,52 @@ def parse_bucket_ownership_controls(
851
909
  }
852
910
 
853
911
 
912
+ def parse_bucket_logging(bucket: str, bucket_logging: Optional[Dict]) -> Optional[Dict]:
913
+ """Parses the S3 bucket logging status configuration and returns a dict of the relevant data"""
914
+ # Logging status object JSON looks like:
915
+ # {
916
+ # 'LoggingEnabled': {
917
+ # 'TargetBucket': 'string',
918
+ # 'TargetGrants': [
919
+ # {
920
+ # 'Grantee': {
921
+ # 'DisplayName': 'string',
922
+ # 'EmailAddress': 'string',
923
+ # 'ID': 'string',
924
+ # 'Type': 'CanonicalUser'|'AmazonCustomerByEmail'|'Group',
925
+ # 'URI': 'string'
926
+ # },
927
+ # 'Permission': 'FULL_CONTROL'|'READ'|'WRITE'
928
+ # },
929
+ # ],
930
+ # 'TargetPrefix': 'string',
931
+ # 'TargetObjectKeyFormat': {
932
+ # 'SimplePrefix': {},
933
+ # 'PartitionedPrefix': {
934
+ # 'PartitionDateSource': 'EventTime'|'DeliveryTime'
935
+ # }
936
+ # }
937
+ # }
938
+ # }
939
+ # Or empty dict {} if logging is not enabled
940
+ if bucket_logging is None:
941
+ return None
942
+
943
+ logging_config = bucket_logging.get("LoggingEnabled", {})
944
+ if not logging_config:
945
+ return {
946
+ "bucket": bucket,
947
+ "logging_enabled": False,
948
+ "target_bucket": None,
949
+ }
950
+
951
+ return {
952
+ "bucket": bucket,
953
+ "logging_enabled": True,
954
+ "target_bucket": logging_config.get("TargetBucket"),
955
+ }
956
+
957
+
854
958
  @timeit
855
959
  def parse_notification_configuration(
856
960
  bucket: str, notification_config: Optional[Dict]
@@ -1,14 +1,13 @@
1
1
  import asyncio
2
- import datetime
3
2
  import logging
4
- from traceback import TracebackException
5
- from typing import Awaitable
6
- from typing import Callable
7
3
 
8
4
  import neo4j
9
5
 
10
6
  from cartography.config import Config
11
- from cartography.intel.entra.resources import RESOURCE_FUNCTIONS
7
+ from cartography.intel.entra.applications import sync_entra_applications
8
+ from cartography.intel.entra.groups import sync_entra_groups
9
+ from cartography.intel.entra.ou import sync_entra_ous
10
+ from cartography.intel.entra.users import sync_entra_users
12
11
  from cartography.util import timeit
13
12
 
14
13
  logger = logging.getLogger(__name__)
@@ -40,46 +39,45 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
40
39
  }
41
40
 
42
41
  async def main() -> None:
43
- failed_stages = []
44
- exception_tracebacks = []
42
+ # Run user sync
43
+ await sync_entra_users(
44
+ neo4j_session,
45
+ config.entra_tenant_id,
46
+ config.entra_client_id,
47
+ config.entra_client_secret,
48
+ config.update_tag,
49
+ common_job_parameters,
50
+ )
45
51
 
46
- async def run_stage(name: str, func: Callable[..., Awaitable[None]]) -> None:
47
- try:
48
- await func(
49
- neo4j_session,
50
- config.entra_tenant_id,
51
- config.entra_client_id,
52
- config.entra_client_secret,
53
- config.update_tag,
54
- common_job_parameters,
55
- )
56
- except Exception as e:
57
- if config.entra_best_effort_mode:
58
- timestamp = datetime.datetime.now()
59
- failed_stages.append(name)
60
- exception_traceback = TracebackException.from_exception(e)
61
- traceback_string = "".join(exception_traceback.format())
62
- exception_tracebacks.append(
63
- f"{timestamp} - Exception for stage {name}\n{traceback_string}"
64
- )
65
- logger.warning(
66
- f"Caught exception syncing {name}. entra-best-effort-mode is on so we are continuing "
67
- "on to the next Entra sync. All exceptions will be aggregated and re-logged at the end of the sync.",
68
- exc_info=True,
69
- )
70
- else:
71
- logger.error("Error during Entra sync", exc_info=True)
72
- raise
52
+ # Run group sync
53
+ await sync_entra_groups(
54
+ neo4j_session,
55
+ config.entra_tenant_id,
56
+ config.entra_client_id,
57
+ config.entra_client_secret,
58
+ config.update_tag,
59
+ common_job_parameters,
60
+ )
73
61
 
74
- for name, func in RESOURCE_FUNCTIONS:
75
- await run_stage(name, func)
62
+ # Run OU sync
63
+ await sync_entra_ous(
64
+ neo4j_session,
65
+ config.entra_tenant_id,
66
+ config.entra_client_id,
67
+ config.entra_client_secret,
68
+ config.update_tag,
69
+ common_job_parameters,
70
+ )
76
71
 
77
- if failed_stages:
78
- logger.error(
79
- f"Entra sync failed for the following stages: {', '.join(failed_stages)}. "
80
- "See the logs for more details.",
81
- )
82
- raise Exception("\n".join(exception_tracebacks))
72
+ # Run application sync
73
+ await sync_entra_applications(
74
+ neo4j_session,
75
+ config.entra_tenant_id,
76
+ config.entra_client_id,
77
+ config.entra_client_secret,
78
+ config.update_tag,
79
+ common_job_parameters,
80
+ )
83
81
 
84
- # Execute all syncs in sequence
82
+ # Execute both syncs in sequence
85
83
  asyncio.run(main())
@@ -172,11 +172,12 @@ async def get_app_role_assignments(
172
172
  )
173
173
  continue
174
174
  except Exception as e:
175
+ # Only catch truly unexpected errors - these should be rare
175
176
  logger.error(
176
177
  f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
177
178
  exc_info=True,
178
179
  )
179
- raise
180
+ continue
180
181
 
181
182
  logger.info(f"Retrieved {len(assignments)} app role assignments total")
182
183
  return assignments
@@ -43,7 +43,7 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
43
43
  current_request = None
44
44
  except Exception as e:
45
45
  logger.error(f"Failed to retrieve administrative units: {str(e)}")
46
- raise
46
+ current_request = None
47
47
 
48
48
  return all_units
49
49
 
@@ -3,7 +3,6 @@ import json
3
3
  import logging
4
4
 
5
5
  import neo4j
6
- from requests import exceptions
7
6
 
8
7
  import cartography.intel.github.repos
9
8
  import cartography.intel.github.teams
@@ -34,27 +33,24 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
34
33
  }
35
34
  # run sync for the provided github tokens
36
35
  for auth_data in auth_tokens["organization"]:
37
- try:
38
- cartography.intel.github.users.sync(
39
- neo4j_session,
40
- common_job_parameters,
41
- auth_data["token"],
42
- auth_data["url"],
43
- auth_data["name"],
44
- )
45
- cartography.intel.github.repos.sync(
46
- neo4j_session,
47
- common_job_parameters,
48
- auth_data["token"],
49
- auth_data["url"],
50
- auth_data["name"],
51
- )
52
- cartography.intel.github.teams.sync_github_teams(
53
- neo4j_session,
54
- common_job_parameters,
55
- auth_data["token"],
56
- auth_data["url"],
57
- auth_data["name"],
58
- )
59
- except exceptions.RequestException as e:
60
- logger.error("Could not complete request to the GitHub API: %s", e)
36
+ cartography.intel.github.users.sync(
37
+ neo4j_session,
38
+ common_job_parameters,
39
+ auth_data["token"],
40
+ auth_data["url"],
41
+ auth_data["name"],
42
+ )
43
+ cartography.intel.github.repos.sync(
44
+ neo4j_session,
45
+ common_job_parameters,
46
+ auth_data["token"],
47
+ auth_data["url"],
48
+ auth_data["name"],
49
+ )
50
+ cartography.intel.github.teams.sync_github_teams(
51
+ neo4j_session,
52
+ common_job_parameters,
53
+ auth_data["token"],
54
+ auth_data["url"],
55
+ auth_data["name"],
56
+ )