cartography 0.110.0rc1__py3-none-any.whl → 0.111.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +16 -3
- cartography/cli.py +0 -8
- cartography/config.py +0 -9
- cartography/data/indexes.cypher +0 -2
- cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
- cartography/graph/querybuilder.py +70 -0
- cartography/intel/aws/apigateway.py +111 -4
- cartography/intel/aws/cognito.py +201 -0
- cartography/intel/aws/ec2/vpc.py +140 -124
- cartography/intel/aws/ecs.py +7 -1
- cartography/intel/aws/eventbridge.py +73 -0
- cartography/intel/aws/glue.py +64 -0
- cartography/intel/aws/kms.py +13 -1
- cartography/intel/aws/rds.py +105 -0
- cartography/intel/aws/resources.py +2 -0
- cartography/intel/aws/route53.py +3 -1
- cartography/intel/aws/s3.py +104 -0
- cartography/intel/entra/__init__.py +41 -43
- cartography/intel/entra/applications.py +2 -1
- cartography/intel/entra/ou.py +1 -1
- cartography/intel/github/__init__.py +21 -25
- cartography/intel/github/repos.py +13 -38
- cartography/intel/kubernetes/__init__.py +4 -0
- cartography/intel/kubernetes/rbac.py +464 -0
- cartography/intel/kubernetes/util.py +17 -0
- cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
- cartography/models/aws/cognito/__init__.py +0 -0
- cartography/models/aws/cognito/identity_pool.py +70 -0
- cartography/models/aws/cognito/user_pool.py +47 -0
- cartography/models/aws/ec2/security_groups.py +1 -1
- cartography/models/aws/ec2/vpc.py +46 -0
- cartography/models/aws/ec2/vpc_cidr.py +102 -0
- cartography/models/aws/ecs/services.py +17 -0
- cartography/models/aws/ecs/tasks.py +1 -0
- cartography/models/aws/eventbridge/target.py +71 -0
- cartography/models/aws/glue/job.py +69 -0
- cartography/models/aws/rds/event_subscription.py +146 -0
- cartography/models/aws/route53/dnsrecord.py +21 -0
- cartography/models/github/dependencies.py +1 -2
- cartography/models/kubernetes/clusterrolebindings.py +98 -0
- cartography/models/kubernetes/clusterroles.py +52 -0
- cartography/models/kubernetes/rolebindings.py +119 -0
- cartography/models/kubernetes/roles.py +76 -0
- cartography/models/kubernetes/serviceaccounts.py +77 -0
- cartography/models/tailscale/device.py +1 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/METADATA +3 -3
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/RECORD +57 -43
- cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
- cartography/intel/entra/resources.py +0 -20
- /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
- /cartography/models/aws/{__init__.py → apigateway/__init__.py} +0 -0
- /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
- /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
- /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
- /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/WHEEL +0 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/top_level.txt +0 -0
cartography/intel/aws/rds.py
CHANGED
|
@@ -9,6 +9,7 @@ import neo4j
|
|
|
9
9
|
from cartography.client.core.tx import load
|
|
10
10
|
from cartography.graph.job import GraphJob
|
|
11
11
|
from cartography.models.aws.rds.cluster import RDSClusterSchema
|
|
12
|
+
from cartography.models.aws.rds.event_subscription import RDSEventSubscriptionSchema
|
|
12
13
|
from cartography.models.aws.rds.instance import RDSInstanceSchema
|
|
13
14
|
from cartography.models.aws.rds.snapshot import RDSSnapshotSchema
|
|
14
15
|
from cartography.models.aws.rds.subnet_group import DBSubnetGroupSchema
|
|
@@ -136,6 +137,38 @@ def load_rds_snapshots(
|
|
|
136
137
|
)
|
|
137
138
|
|
|
138
139
|
|
|
140
|
+
@timeit
|
|
141
|
+
@aws_handle_regions
|
|
142
|
+
def get_rds_event_subscription_data(
|
|
143
|
+
boto3_session: boto3.session.Session,
|
|
144
|
+
region: str,
|
|
145
|
+
) -> List[Dict[str, Any]]:
|
|
146
|
+
client = boto3_session.client("rds", region_name=region)
|
|
147
|
+
paginator = client.get_paginator("describe_event_subscriptions")
|
|
148
|
+
subscriptions = []
|
|
149
|
+
for page in paginator.paginate():
|
|
150
|
+
subscriptions.extend(page["EventSubscriptionsList"])
|
|
151
|
+
return subscriptions
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@timeit
|
|
155
|
+
def load_rds_event_subscriptions(
|
|
156
|
+
neo4j_session: neo4j.Session,
|
|
157
|
+
data: List[Dict],
|
|
158
|
+
region: str,
|
|
159
|
+
current_aws_account_id: str,
|
|
160
|
+
aws_update_tag: int,
|
|
161
|
+
) -> None:
|
|
162
|
+
load(
|
|
163
|
+
neo4j_session,
|
|
164
|
+
RDSEventSubscriptionSchema(),
|
|
165
|
+
data,
|
|
166
|
+
lastupdated=aws_update_tag,
|
|
167
|
+
Region=region,
|
|
168
|
+
AWS_ID=current_aws_account_id,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
139
172
|
def _validate_rds_endpoint(rds: Dict) -> Dict:
|
|
140
173
|
"""
|
|
141
174
|
Get Endpoint from RDS data structure. Log to debug if an Endpoint field does not exist.
|
|
@@ -292,6 +325,28 @@ def transform_rds_instances(
|
|
|
292
325
|
return instances
|
|
293
326
|
|
|
294
327
|
|
|
328
|
+
def transform_rds_event_subscriptions(data: List[Dict]) -> List[Dict]:
|
|
329
|
+
subscriptions = []
|
|
330
|
+
for subscription in data:
|
|
331
|
+
transformed = {
|
|
332
|
+
"CustSubscriptionId": subscription.get("CustSubscriptionId"),
|
|
333
|
+
"EventSubscriptionArn": subscription.get("EventSubscriptionArn"),
|
|
334
|
+
"CustomerAwsId": subscription.get("CustomerAwsId"),
|
|
335
|
+
"SnsTopicArn": subscription.get("SnsTopicArn"),
|
|
336
|
+
"SourceType": subscription.get("SourceType"),
|
|
337
|
+
"Status": subscription.get("Status"),
|
|
338
|
+
"Enabled": subscription.get("Enabled"),
|
|
339
|
+
"SubscriptionCreationTime": dict_value_to_str(
|
|
340
|
+
subscription, "SubscriptionCreationTime"
|
|
341
|
+
),
|
|
342
|
+
"event_categories": subscription.get("EventCategoriesList") or None,
|
|
343
|
+
"source_ids": subscription.get("SourceIdsList") or None,
|
|
344
|
+
"lastupdated": None, # This will be set by the loader
|
|
345
|
+
}
|
|
346
|
+
subscriptions.append(transformed)
|
|
347
|
+
return subscriptions
|
|
348
|
+
|
|
349
|
+
|
|
295
350
|
def transform_rds_subnet_groups(
|
|
296
351
|
data: List[Dict], region: str, current_aws_account_id: str
|
|
297
352
|
) -> List[Dict]:
|
|
@@ -412,6 +467,20 @@ def cleanup_rds_snapshots(
|
|
|
412
467
|
)
|
|
413
468
|
|
|
414
469
|
|
|
470
|
+
@timeit
|
|
471
|
+
def cleanup_rds_event_subscriptions(
|
|
472
|
+
neo4j_session: neo4j.Session,
|
|
473
|
+
common_job_parameters: Dict,
|
|
474
|
+
) -> None:
|
|
475
|
+
"""
|
|
476
|
+
Remove RDS event subscriptions that weren't updated in this sync run
|
|
477
|
+
"""
|
|
478
|
+
logger.debug("Running RDS event subscriptions cleanup job")
|
|
479
|
+
GraphJob.from_node_schema(RDSEventSubscriptionSchema(), common_job_parameters).run(
|
|
480
|
+
neo4j_session
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
|
|
415
484
|
@timeit
|
|
416
485
|
def sync_rds_clusters(
|
|
417
486
|
neo4j_session: neo4j.Session,
|
|
@@ -498,6 +567,32 @@ def sync_rds_snapshots(
|
|
|
498
567
|
cleanup_rds_snapshots(neo4j_session, common_job_parameters)
|
|
499
568
|
|
|
500
569
|
|
|
570
|
+
@timeit
|
|
571
|
+
def sync_rds_event_subscriptions(
|
|
572
|
+
neo4j_session: neo4j.Session,
|
|
573
|
+
boto3_session: boto3.session.Session,
|
|
574
|
+
regions: List[str],
|
|
575
|
+
current_aws_account_id: str,
|
|
576
|
+
update_tag: int,
|
|
577
|
+
common_job_parameters: Dict,
|
|
578
|
+
) -> None:
|
|
579
|
+
"""
|
|
580
|
+
Grab RDS event subscription data from AWS, ingest to neo4j, and run the cleanup job.
|
|
581
|
+
"""
|
|
582
|
+
for region in regions:
|
|
583
|
+
logger.info(
|
|
584
|
+
"Syncing RDS event subscriptions for region '%s' in account '%s'.",
|
|
585
|
+
region,
|
|
586
|
+
current_aws_account_id,
|
|
587
|
+
)
|
|
588
|
+
data = get_rds_event_subscription_data(boto3_session, region)
|
|
589
|
+
transformed = transform_rds_event_subscriptions(data)
|
|
590
|
+
load_rds_event_subscriptions(
|
|
591
|
+
neo4j_session, transformed, region, current_aws_account_id, update_tag
|
|
592
|
+
)
|
|
593
|
+
cleanup_rds_event_subscriptions(neo4j_session, common_job_parameters)
|
|
594
|
+
|
|
595
|
+
|
|
501
596
|
@timeit
|
|
502
597
|
def sync(
|
|
503
598
|
neo4j_session: neo4j.Session,
|
|
@@ -531,6 +626,16 @@ def sync(
|
|
|
531
626
|
update_tag,
|
|
532
627
|
common_job_parameters,
|
|
533
628
|
)
|
|
629
|
+
|
|
630
|
+
sync_rds_event_subscriptions(
|
|
631
|
+
neo4j_session,
|
|
632
|
+
boto3_session,
|
|
633
|
+
regions,
|
|
634
|
+
current_aws_account_id,
|
|
635
|
+
update_tag,
|
|
636
|
+
common_job_parameters,
|
|
637
|
+
)
|
|
638
|
+
|
|
534
639
|
merge_module_sync_metadata(
|
|
535
640
|
neo4j_session,
|
|
536
641
|
group_type="AWSAccount",
|
|
@@ -9,6 +9,7 @@ from . import cloudtrail
|
|
|
9
9
|
from . import cloudtrail_management_events
|
|
10
10
|
from . import cloudwatch
|
|
11
11
|
from . import codebuild
|
|
12
|
+
from . import cognito
|
|
12
13
|
from . import config
|
|
13
14
|
from . import dynamodb
|
|
14
15
|
from . import ecr
|
|
@@ -116,6 +117,7 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
|
|
|
116
117
|
"efs": efs.sync,
|
|
117
118
|
"guardduty": guardduty.sync,
|
|
118
119
|
"codebuild": codebuild.sync,
|
|
120
|
+
"cognito": cognito.sync,
|
|
119
121
|
"eventbridge": eventbridge.sync,
|
|
120
122
|
"glue": glue.sync,
|
|
121
123
|
}
|
cartography/intel/aws/route53.py
CHANGED
|
@@ -398,7 +398,9 @@ def link_sub_zones(
|
|
|
398
398
|
MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(z:AWSDNSZone)
|
|
399
399
|
<-[:MEMBER_OF_DNS_ZONE]-(record:DNSRecord{type:"NS"})
|
|
400
400
|
-[:DNS_POINTS_TO]->(ns:NameServer)<-[:NAMESERVER]-(z2:AWSDNSZone)
|
|
401
|
-
|
|
401
|
+
WHERE record.name = z2.name AND
|
|
402
|
+
z2.name ENDS WITH '.' + z.name AND
|
|
403
|
+
NOT z = z2
|
|
402
404
|
RETURN z.id as zone_id, z2.id as subzone_id
|
|
403
405
|
"""
|
|
404
406
|
zone_to_subzone = neo4j_session.read_transaction(
|
cartography/intel/aws/s3.py
CHANGED
|
@@ -71,6 +71,7 @@ def get_s3_bucket_details(
|
|
|
71
71
|
Dict[str, Any],
|
|
72
72
|
Dict[str, Any],
|
|
73
73
|
Dict[str, Any],
|
|
74
|
+
Dict[str, Any],
|
|
74
75
|
]
|
|
75
76
|
|
|
76
77
|
async def _get_bucket_detail(bucket: Dict[str, Any]) -> BucketDetail:
|
|
@@ -88,6 +89,7 @@ def get_s3_bucket_details(
|
|
|
88
89
|
versioning,
|
|
89
90
|
public_access_block,
|
|
90
91
|
bucket_ownership_controls,
|
|
92
|
+
bucket_logging,
|
|
91
93
|
) = await asyncio.gather(
|
|
92
94
|
to_asynchronous(get_acl, bucket, client),
|
|
93
95
|
to_asynchronous(get_policy, bucket, client),
|
|
@@ -95,6 +97,7 @@ def get_s3_bucket_details(
|
|
|
95
97
|
to_asynchronous(get_versioning, bucket, client),
|
|
96
98
|
to_asynchronous(get_public_access_block, bucket, client),
|
|
97
99
|
to_asynchronous(get_bucket_ownership_controls, bucket, client),
|
|
100
|
+
to_asynchronous(get_bucket_logging, bucket, client),
|
|
98
101
|
)
|
|
99
102
|
return (
|
|
100
103
|
bucket["Name"],
|
|
@@ -104,6 +107,7 @@ def get_s3_bucket_details(
|
|
|
104
107
|
versioning,
|
|
105
108
|
public_access_block,
|
|
106
109
|
bucket_ownership_controls,
|
|
110
|
+
bucket_logging,
|
|
107
111
|
)
|
|
108
112
|
|
|
109
113
|
bucket_details = to_synchronous(
|
|
@@ -241,6 +245,29 @@ def get_bucket_ownership_controls(
|
|
|
241
245
|
return bucket_ownership_controls
|
|
242
246
|
|
|
243
247
|
|
|
248
|
+
@timeit
|
|
249
|
+
@aws_handle_regions
|
|
250
|
+
def get_bucket_logging(
|
|
251
|
+
bucket: Dict, client: botocore.client.BaseClient
|
|
252
|
+
) -> Optional[Dict]:
|
|
253
|
+
"""
|
|
254
|
+
Gets the S3 bucket logging status configuration.
|
|
255
|
+
"""
|
|
256
|
+
bucket_logging = None
|
|
257
|
+
try:
|
|
258
|
+
bucket_logging = client.get_bucket_logging(Bucket=bucket["Name"])
|
|
259
|
+
except ClientError as e:
|
|
260
|
+
if _is_common_exception(e, bucket):
|
|
261
|
+
pass
|
|
262
|
+
else:
|
|
263
|
+
raise
|
|
264
|
+
except EndpointConnectionError:
|
|
265
|
+
logger.warning(
|
|
266
|
+
f"Failed to retrieve S3 bucket logging status for {bucket['Name']} - Could not connect to the endpoint URL",
|
|
267
|
+
)
|
|
268
|
+
return bucket_logging
|
|
269
|
+
|
|
270
|
+
|
|
244
271
|
@timeit
|
|
245
272
|
def _is_common_exception(e: Exception, bucket: Dict) -> bool:
|
|
246
273
|
error_msg = "Failed to retrieve S3 bucket detail"
|
|
@@ -319,6 +346,7 @@ def _load_s3_acls(
|
|
|
319
346
|
"aws_s3acl_analysis.json",
|
|
320
347
|
neo4j_session,
|
|
321
348
|
{"AWS_ID": aws_account_id},
|
|
349
|
+
package="cartography.data.jobs.scoped_analysis",
|
|
322
350
|
)
|
|
323
351
|
|
|
324
352
|
|
|
@@ -479,6 +507,30 @@ def _load_bucket_ownership_controls(
|
|
|
479
507
|
)
|
|
480
508
|
|
|
481
509
|
|
|
510
|
+
@timeit
|
|
511
|
+
def _load_bucket_logging(
|
|
512
|
+
neo4j_session: neo4j.Session,
|
|
513
|
+
bucket_logging_configs: List[Dict],
|
|
514
|
+
update_tag: int,
|
|
515
|
+
) -> None:
|
|
516
|
+
"""
|
|
517
|
+
Ingest S3 bucket logging status configuration into neo4j.
|
|
518
|
+
"""
|
|
519
|
+
# Load basic logging status
|
|
520
|
+
ingest_bucket_logging = """
|
|
521
|
+
UNWIND $bucket_logging_configs AS bucket_logging
|
|
522
|
+
MATCH (bucket:S3Bucket{name: bucket_logging.bucket})
|
|
523
|
+
SET bucket.logging_enabled = bucket_logging.logging_enabled,
|
|
524
|
+
bucket.logging_target_bucket = bucket_logging.target_bucket,
|
|
525
|
+
bucket.lastupdated = $update_tag
|
|
526
|
+
"""
|
|
527
|
+
neo4j_session.run(
|
|
528
|
+
ingest_bucket_logging,
|
|
529
|
+
bucket_logging_configs=bucket_logging_configs,
|
|
530
|
+
update_tag=update_tag,
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
|
|
482
534
|
def _set_default_values(neo4j_session: neo4j.Session, aws_account_id: str) -> None:
|
|
483
535
|
set_defaults = """
|
|
484
536
|
MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(s:S3Bucket) where s.anonymous_actions IS NULL
|
|
@@ -516,6 +568,7 @@ def load_s3_details(
|
|
|
516
568
|
versioning_configs: List[Dict] = []
|
|
517
569
|
public_access_block_configs: List[Dict] = []
|
|
518
570
|
bucket_ownership_controls_configs: List[Dict] = []
|
|
571
|
+
bucket_logging_configs: List[Dict] = []
|
|
519
572
|
for (
|
|
520
573
|
bucket,
|
|
521
574
|
acl,
|
|
@@ -524,6 +577,7 @@ def load_s3_details(
|
|
|
524
577
|
versioning,
|
|
525
578
|
public_access_block,
|
|
526
579
|
bucket_ownership_controls,
|
|
580
|
+
bucket_logging,
|
|
527
581
|
) in s3_details_iter:
|
|
528
582
|
parsed_acls = parse_acl(acl, bucket, aws_account_id)
|
|
529
583
|
if parsed_acls is not None:
|
|
@@ -551,6 +605,9 @@ def load_s3_details(
|
|
|
551
605
|
)
|
|
552
606
|
if parsed_bucket_ownership_controls is not None:
|
|
553
607
|
bucket_ownership_controls_configs.append(parsed_bucket_ownership_controls)
|
|
608
|
+
parsed_bucket_logging = parse_bucket_logging(bucket, bucket_logging)
|
|
609
|
+
if parsed_bucket_logging is not None:
|
|
610
|
+
bucket_logging_configs.append(parsed_bucket_logging)
|
|
554
611
|
|
|
555
612
|
# cleanup existing policy properties set on S3 Buckets
|
|
556
613
|
run_cleanup_job(
|
|
@@ -569,6 +626,7 @@ def load_s3_details(
|
|
|
569
626
|
_load_bucket_ownership_controls(
|
|
570
627
|
neo4j_session, bucket_ownership_controls_configs, update_tag
|
|
571
628
|
)
|
|
629
|
+
_load_bucket_logging(neo4j_session, bucket_logging_configs, update_tag)
|
|
572
630
|
_set_default_values(neo4j_session, aws_account_id)
|
|
573
631
|
|
|
574
632
|
|
|
@@ -851,6 +909,52 @@ def parse_bucket_ownership_controls(
|
|
|
851
909
|
}
|
|
852
910
|
|
|
853
911
|
|
|
912
|
+
def parse_bucket_logging(bucket: str, bucket_logging: Optional[Dict]) -> Optional[Dict]:
|
|
913
|
+
"""Parses the S3 bucket logging status configuration and returns a dict of the relevant data"""
|
|
914
|
+
# Logging status object JSON looks like:
|
|
915
|
+
# {
|
|
916
|
+
# 'LoggingEnabled': {
|
|
917
|
+
# 'TargetBucket': 'string',
|
|
918
|
+
# 'TargetGrants': [
|
|
919
|
+
# {
|
|
920
|
+
# 'Grantee': {
|
|
921
|
+
# 'DisplayName': 'string',
|
|
922
|
+
# 'EmailAddress': 'string',
|
|
923
|
+
# 'ID': 'string',
|
|
924
|
+
# 'Type': 'CanonicalUser'|'AmazonCustomerByEmail'|'Group',
|
|
925
|
+
# 'URI': 'string'
|
|
926
|
+
# },
|
|
927
|
+
# 'Permission': 'FULL_CONTROL'|'READ'|'WRITE'
|
|
928
|
+
# },
|
|
929
|
+
# ],
|
|
930
|
+
# 'TargetPrefix': 'string',
|
|
931
|
+
# 'TargetObjectKeyFormat': {
|
|
932
|
+
# 'SimplePrefix': {},
|
|
933
|
+
# 'PartitionedPrefix': {
|
|
934
|
+
# 'PartitionDateSource': 'EventTime'|'DeliveryTime'
|
|
935
|
+
# }
|
|
936
|
+
# }
|
|
937
|
+
# }
|
|
938
|
+
# }
|
|
939
|
+
# Or empty dict {} if logging is not enabled
|
|
940
|
+
if bucket_logging is None:
|
|
941
|
+
return None
|
|
942
|
+
|
|
943
|
+
logging_config = bucket_logging.get("LoggingEnabled", {})
|
|
944
|
+
if not logging_config:
|
|
945
|
+
return {
|
|
946
|
+
"bucket": bucket,
|
|
947
|
+
"logging_enabled": False,
|
|
948
|
+
"target_bucket": None,
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
return {
|
|
952
|
+
"bucket": bucket,
|
|
953
|
+
"logging_enabled": True,
|
|
954
|
+
"target_bucket": logging_config.get("TargetBucket"),
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
|
|
854
958
|
@timeit
|
|
855
959
|
def parse_notification_configuration(
|
|
856
960
|
bucket: str, notification_config: Optional[Dict]
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import datetime
|
|
3
2
|
import logging
|
|
4
|
-
from traceback import TracebackException
|
|
5
|
-
from typing import Awaitable
|
|
6
|
-
from typing import Callable
|
|
7
3
|
|
|
8
4
|
import neo4j
|
|
9
5
|
|
|
10
6
|
from cartography.config import Config
|
|
11
|
-
from cartography.intel.entra.
|
|
7
|
+
from cartography.intel.entra.applications import sync_entra_applications
|
|
8
|
+
from cartography.intel.entra.groups import sync_entra_groups
|
|
9
|
+
from cartography.intel.entra.ou import sync_entra_ous
|
|
10
|
+
from cartography.intel.entra.users import sync_entra_users
|
|
12
11
|
from cartography.util import timeit
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
@@ -40,46 +39,45 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
|
40
39
|
}
|
|
41
40
|
|
|
42
41
|
async def main() -> None:
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
# Run user sync
|
|
43
|
+
await sync_entra_users(
|
|
44
|
+
neo4j_session,
|
|
45
|
+
config.entra_tenant_id,
|
|
46
|
+
config.entra_client_id,
|
|
47
|
+
config.entra_client_secret,
|
|
48
|
+
config.update_tag,
|
|
49
|
+
common_job_parameters,
|
|
50
|
+
)
|
|
45
51
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
)
|
|
56
|
-
except Exception as e:
|
|
57
|
-
if config.entra_best_effort_mode:
|
|
58
|
-
timestamp = datetime.datetime.now()
|
|
59
|
-
failed_stages.append(name)
|
|
60
|
-
exception_traceback = TracebackException.from_exception(e)
|
|
61
|
-
traceback_string = "".join(exception_traceback.format())
|
|
62
|
-
exception_tracebacks.append(
|
|
63
|
-
f"{timestamp} - Exception for stage {name}\n{traceback_string}"
|
|
64
|
-
)
|
|
65
|
-
logger.warning(
|
|
66
|
-
f"Caught exception syncing {name}. entra-best-effort-mode is on so we are continuing "
|
|
67
|
-
"on to the next Entra sync. All exceptions will be aggregated and re-logged at the end of the sync.",
|
|
68
|
-
exc_info=True,
|
|
69
|
-
)
|
|
70
|
-
else:
|
|
71
|
-
logger.error("Error during Entra sync", exc_info=True)
|
|
72
|
-
raise
|
|
52
|
+
# Run group sync
|
|
53
|
+
await sync_entra_groups(
|
|
54
|
+
neo4j_session,
|
|
55
|
+
config.entra_tenant_id,
|
|
56
|
+
config.entra_client_id,
|
|
57
|
+
config.entra_client_secret,
|
|
58
|
+
config.update_tag,
|
|
59
|
+
common_job_parameters,
|
|
60
|
+
)
|
|
73
61
|
|
|
74
|
-
|
|
75
|
-
|
|
62
|
+
# Run OU sync
|
|
63
|
+
await sync_entra_ous(
|
|
64
|
+
neo4j_session,
|
|
65
|
+
config.entra_tenant_id,
|
|
66
|
+
config.entra_client_id,
|
|
67
|
+
config.entra_client_secret,
|
|
68
|
+
config.update_tag,
|
|
69
|
+
common_job_parameters,
|
|
70
|
+
)
|
|
76
71
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
72
|
+
# Run application sync
|
|
73
|
+
await sync_entra_applications(
|
|
74
|
+
neo4j_session,
|
|
75
|
+
config.entra_tenant_id,
|
|
76
|
+
config.entra_client_id,
|
|
77
|
+
config.entra_client_secret,
|
|
78
|
+
config.update_tag,
|
|
79
|
+
common_job_parameters,
|
|
80
|
+
)
|
|
83
81
|
|
|
84
|
-
# Execute
|
|
82
|
+
# Execute both syncs in sequence
|
|
85
83
|
asyncio.run(main())
|
|
@@ -172,11 +172,12 @@ async def get_app_role_assignments(
|
|
|
172
172
|
)
|
|
173
173
|
continue
|
|
174
174
|
except Exception as e:
|
|
175
|
+
# Only catch truly unexpected errors - these should be rare
|
|
175
176
|
logger.error(
|
|
176
177
|
f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
|
|
177
178
|
exc_info=True,
|
|
178
179
|
)
|
|
179
|
-
|
|
180
|
+
continue
|
|
180
181
|
|
|
181
182
|
logger.info(f"Retrieved {len(assignments)} app role assignments total")
|
|
182
183
|
return assignments
|
cartography/intel/entra/ou.py
CHANGED
|
@@ -43,7 +43,7 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
|
|
|
43
43
|
current_request = None
|
|
44
44
|
except Exception as e:
|
|
45
45
|
logger.error(f"Failed to retrieve administrative units: {str(e)}")
|
|
46
|
-
|
|
46
|
+
current_request = None
|
|
47
47
|
|
|
48
48
|
return all_units
|
|
49
49
|
|
|
@@ -3,7 +3,6 @@ import json
|
|
|
3
3
|
import logging
|
|
4
4
|
|
|
5
5
|
import neo4j
|
|
6
|
-
from requests import exceptions
|
|
7
6
|
|
|
8
7
|
import cartography.intel.github.repos
|
|
9
8
|
import cartography.intel.github.teams
|
|
@@ -34,27 +33,24 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
|
|
|
34
33
|
}
|
|
35
34
|
# run sync for the provided github tokens
|
|
36
35
|
for auth_data in auth_tokens["organization"]:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
)
|
|
59
|
-
except exceptions.RequestException as e:
|
|
60
|
-
logger.error("Could not complete request to the GitHub API: %s", e)
|
|
36
|
+
cartography.intel.github.users.sync(
|
|
37
|
+
neo4j_session,
|
|
38
|
+
common_job_parameters,
|
|
39
|
+
auth_data["token"],
|
|
40
|
+
auth_data["url"],
|
|
41
|
+
auth_data["name"],
|
|
42
|
+
)
|
|
43
|
+
cartography.intel.github.repos.sync(
|
|
44
|
+
neo4j_session,
|
|
45
|
+
common_job_parameters,
|
|
46
|
+
auth_data["token"],
|
|
47
|
+
auth_data["url"],
|
|
48
|
+
auth_data["name"],
|
|
49
|
+
)
|
|
50
|
+
cartography.intel.github.teams.sync_github_teams(
|
|
51
|
+
neo4j_session,
|
|
52
|
+
common_job_parameters,
|
|
53
|
+
auth_data["token"],
|
|
54
|
+
auth_data["url"],
|
|
55
|
+
auth_data["name"],
|
|
56
|
+
)
|
|
@@ -405,9 +405,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
|
|
|
405
405
|
|
|
406
406
|
def _create_git_url_from_ssh_url(ssh_url: str) -> str:
|
|
407
407
|
"""
|
|
408
|
-
|
|
408
|
+
Convert SSH URL to git:// URL.
|
|
409
|
+
Example:
|
|
410
|
+
git@github.com:cartography-cncf/cartography.git
|
|
411
|
+
-> git://github.com/cartography-cncf/cartography.git
|
|
409
412
|
"""
|
|
410
|
-
|
|
413
|
+
# Remove the user part (e.g., "git@")
|
|
414
|
+
_, host_and_path = ssh_url.split("@", 1)
|
|
415
|
+
# Replace first ':' (separating host and repo) with '/'
|
|
416
|
+
host, path = host_and_path.split(":", 1)
|
|
417
|
+
return f"git://{host}/{path}"
|
|
411
418
|
|
|
412
419
|
|
|
413
420
|
def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
|
|
@@ -647,9 +654,6 @@ def _transform_dependency_graph(
|
|
|
647
654
|
requirements = dep.get("requirements", "")
|
|
648
655
|
package_manager = dep.get("packageManager", "").upper()
|
|
649
656
|
|
|
650
|
-
# Extract version from requirements string if available
|
|
651
|
-
pinned_version = _extract_version_from_requirements(requirements)
|
|
652
|
-
|
|
653
657
|
# Create ecosystem-specific canonical name
|
|
654
658
|
canonical_name = _canonicalize_dependency_name(
|
|
655
659
|
package_name, package_manager
|
|
@@ -658,11 +662,12 @@ def _transform_dependency_graph(
|
|
|
658
662
|
# Create ecosystem identifier
|
|
659
663
|
ecosystem = package_manager.lower() if package_manager else "unknown"
|
|
660
664
|
|
|
661
|
-
# Create simple dependency ID using canonical name and
|
|
665
|
+
# Create simple dependency ID using canonical name and requirements
|
|
662
666
|
# This allows the same dependency to be shared across multiple repos
|
|
667
|
+
requirements_for_id = (requirements or "").strip()
|
|
663
668
|
dependency_id = (
|
|
664
|
-
f"{canonical_name}|{
|
|
665
|
-
if
|
|
669
|
+
f"{canonical_name}|{requirements_for_id}"
|
|
670
|
+
if requirements_for_id
|
|
666
671
|
else canonical_name
|
|
667
672
|
)
|
|
668
673
|
|
|
@@ -677,15 +682,12 @@ def _transform_dependency_graph(
|
|
|
677
682
|
"id": dependency_id,
|
|
678
683
|
"name": canonical_name,
|
|
679
684
|
"original_name": package_name, # Keep original for reference
|
|
680
|
-
"version": pinned_version,
|
|
681
685
|
"requirements": normalized_requirements,
|
|
682
686
|
"ecosystem": ecosystem,
|
|
683
687
|
"package_manager": package_manager,
|
|
684
688
|
"manifest_path": manifest_path,
|
|
685
689
|
"manifest_id": manifest_id,
|
|
686
690
|
"repo_url": repo_url,
|
|
687
|
-
# Add separate fields for easier querying
|
|
688
|
-
"repo_name": repo_url.split("/")[-1] if repo_url else "",
|
|
689
691
|
"manifest_file": (
|
|
690
692
|
manifest_path.split("/")[-1] if manifest_path else ""
|
|
691
693
|
),
|
|
@@ -698,33 +700,6 @@ def _transform_dependency_graph(
|
|
|
698
700
|
logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
|
|
699
701
|
|
|
700
702
|
|
|
701
|
-
def _extract_version_from_requirements(requirements: Optional[str]) -> Optional[str]:
|
|
702
|
-
"""
|
|
703
|
-
Extract a pinned version from a requirements string if it exists.
|
|
704
|
-
Examples: "1.2.3" -> "1.2.3", "^1.2.3" -> None, ">=1.0,<2.0" -> None
|
|
705
|
-
"""
|
|
706
|
-
if not requirements or not requirements.strip():
|
|
707
|
-
return None
|
|
708
|
-
|
|
709
|
-
# Handle exact version specifications (no operators)
|
|
710
|
-
if requirements and not any(
|
|
711
|
-
op in requirements for op in ["^", "~", ">", "<", "=", "*"]
|
|
712
|
-
):
|
|
713
|
-
stripped = requirements.strip()
|
|
714
|
-
return stripped if stripped else None
|
|
715
|
-
|
|
716
|
-
# Handle == specifications
|
|
717
|
-
if "==" in requirements:
|
|
718
|
-
parts = requirements.split("==")
|
|
719
|
-
if len(parts) == 2:
|
|
720
|
-
version = parts[1].strip()
|
|
721
|
-
# Remove any trailing constraints
|
|
722
|
-
version = version.split(",")[0].split(" ")[0]
|
|
723
|
-
return version if version else None
|
|
724
|
-
|
|
725
|
-
return None
|
|
726
|
-
|
|
727
|
-
|
|
728
703
|
def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
|
|
729
704
|
"""
|
|
730
705
|
Canonicalize dependency names based on ecosystem conventions.
|
|
@@ -6,6 +6,7 @@ from cartography.config import Config
|
|
|
6
6
|
from cartography.intel.kubernetes.clusters import sync_kubernetes_cluster
|
|
7
7
|
from cartography.intel.kubernetes.namespaces import sync_namespaces
|
|
8
8
|
from cartography.intel.kubernetes.pods import sync_pods
|
|
9
|
+
from cartography.intel.kubernetes.rbac import sync_kubernetes_rbac
|
|
9
10
|
from cartography.intel.kubernetes.secrets import sync_secrets
|
|
10
11
|
from cartography.intel.kubernetes.services import sync_services
|
|
11
12
|
from cartography.intel.kubernetes.util import get_k8s_clients
|
|
@@ -38,6 +39,9 @@ def start_k8s_ingestion(session: Session, config: Config) -> None:
|
|
|
38
39
|
common_job_parameters["CLUSTER_ID"] = cluster_info.get("id")
|
|
39
40
|
|
|
40
41
|
sync_namespaces(session, client, config.update_tag, common_job_parameters)
|
|
42
|
+
sync_kubernetes_rbac(
|
|
43
|
+
session, client, config.update_tag, common_job_parameters
|
|
44
|
+
)
|
|
41
45
|
all_pods = sync_pods(
|
|
42
46
|
session,
|
|
43
47
|
client,
|