cartography 0.110.0rc1__py3-none-any.whl → 0.111.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (59) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +0 -8
  3. cartography/config.py +0 -9
  4. cartography/data/indexes.cypher +0 -2
  5. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  6. cartography/graph/querybuilder.py +70 -0
  7. cartography/intel/aws/apigateway.py +111 -4
  8. cartography/intel/aws/cognito.py +201 -0
  9. cartography/intel/aws/ec2/vpc.py +140 -124
  10. cartography/intel/aws/ecs.py +7 -1
  11. cartography/intel/aws/eventbridge.py +73 -0
  12. cartography/intel/aws/glue.py +64 -0
  13. cartography/intel/aws/kms.py +13 -1
  14. cartography/intel/aws/rds.py +105 -0
  15. cartography/intel/aws/resources.py +2 -0
  16. cartography/intel/aws/route53.py +3 -1
  17. cartography/intel/aws/s3.py +104 -0
  18. cartography/intel/entra/__init__.py +41 -43
  19. cartography/intel/entra/applications.py +2 -1
  20. cartography/intel/entra/ou.py +1 -1
  21. cartography/intel/github/__init__.py +21 -25
  22. cartography/intel/github/repos.py +13 -38
  23. cartography/intel/kubernetes/__init__.py +4 -0
  24. cartography/intel/kubernetes/rbac.py +464 -0
  25. cartography/intel/kubernetes/util.py +17 -0
  26. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  27. cartography/models/aws/cognito/__init__.py +0 -0
  28. cartography/models/aws/cognito/identity_pool.py +70 -0
  29. cartography/models/aws/cognito/user_pool.py +47 -0
  30. cartography/models/aws/ec2/security_groups.py +1 -1
  31. cartography/models/aws/ec2/vpc.py +46 -0
  32. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  33. cartography/models/aws/ecs/services.py +17 -0
  34. cartography/models/aws/ecs/tasks.py +1 -0
  35. cartography/models/aws/eventbridge/target.py +71 -0
  36. cartography/models/aws/glue/job.py +69 -0
  37. cartography/models/aws/rds/event_subscription.py +146 -0
  38. cartography/models/aws/route53/dnsrecord.py +21 -0
  39. cartography/models/github/dependencies.py +1 -2
  40. cartography/models/kubernetes/clusterrolebindings.py +98 -0
  41. cartography/models/kubernetes/clusterroles.py +52 -0
  42. cartography/models/kubernetes/rolebindings.py +119 -0
  43. cartography/models/kubernetes/roles.py +76 -0
  44. cartography/models/kubernetes/serviceaccounts.py +77 -0
  45. cartography/models/tailscale/device.py +1 -0
  46. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/METADATA +3 -3
  47. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/RECORD +57 -43
  48. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  49. cartography/intel/entra/resources.py +0 -20
  50. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  51. /cartography/models/aws/{__init__.py → apigateway/__init__.py} +0 -0
  52. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  53. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  54. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  55. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  56. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/WHEEL +0 -0
  57. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/entry_points.txt +0 -0
  58. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/licenses/LICENSE +0 -0
  59. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ import neo4j
9
9
  from cartography.client.core.tx import load
10
10
  from cartography.graph.job import GraphJob
11
11
  from cartography.models.aws.rds.cluster import RDSClusterSchema
12
+ from cartography.models.aws.rds.event_subscription import RDSEventSubscriptionSchema
12
13
  from cartography.models.aws.rds.instance import RDSInstanceSchema
13
14
  from cartography.models.aws.rds.snapshot import RDSSnapshotSchema
14
15
  from cartography.models.aws.rds.subnet_group import DBSubnetGroupSchema
@@ -136,6 +137,38 @@ def load_rds_snapshots(
136
137
  )
137
138
 
138
139
 
140
+ @timeit
141
+ @aws_handle_regions
142
+ def get_rds_event_subscription_data(
143
+ boto3_session: boto3.session.Session,
144
+ region: str,
145
+ ) -> List[Dict[str, Any]]:
146
+ client = boto3_session.client("rds", region_name=region)
147
+ paginator = client.get_paginator("describe_event_subscriptions")
148
+ subscriptions = []
149
+ for page in paginator.paginate():
150
+ subscriptions.extend(page["EventSubscriptionsList"])
151
+ return subscriptions
152
+
153
+
154
+ @timeit
155
+ def load_rds_event_subscriptions(
156
+ neo4j_session: neo4j.Session,
157
+ data: List[Dict],
158
+ region: str,
159
+ current_aws_account_id: str,
160
+ aws_update_tag: int,
161
+ ) -> None:
162
+ load(
163
+ neo4j_session,
164
+ RDSEventSubscriptionSchema(),
165
+ data,
166
+ lastupdated=aws_update_tag,
167
+ Region=region,
168
+ AWS_ID=current_aws_account_id,
169
+ )
170
+
171
+
139
172
  def _validate_rds_endpoint(rds: Dict) -> Dict:
140
173
  """
141
174
  Get Endpoint from RDS data structure. Log to debug if an Endpoint field does not exist.
@@ -292,6 +325,28 @@ def transform_rds_instances(
292
325
  return instances
293
326
 
294
327
 
328
+ def transform_rds_event_subscriptions(data: List[Dict]) -> List[Dict]:
329
+ subscriptions = []
330
+ for subscription in data:
331
+ transformed = {
332
+ "CustSubscriptionId": subscription.get("CustSubscriptionId"),
333
+ "EventSubscriptionArn": subscription.get("EventSubscriptionArn"),
334
+ "CustomerAwsId": subscription.get("CustomerAwsId"),
335
+ "SnsTopicArn": subscription.get("SnsTopicArn"),
336
+ "SourceType": subscription.get("SourceType"),
337
+ "Status": subscription.get("Status"),
338
+ "Enabled": subscription.get("Enabled"),
339
+ "SubscriptionCreationTime": dict_value_to_str(
340
+ subscription, "SubscriptionCreationTime"
341
+ ),
342
+ "event_categories": subscription.get("EventCategoriesList") or None,
343
+ "source_ids": subscription.get("SourceIdsList") or None,
344
+ "lastupdated": None, # This will be set by the loader
345
+ }
346
+ subscriptions.append(transformed)
347
+ return subscriptions
348
+
349
+
295
350
  def transform_rds_subnet_groups(
296
351
  data: List[Dict], region: str, current_aws_account_id: str
297
352
  ) -> List[Dict]:
@@ -412,6 +467,20 @@ def cleanup_rds_snapshots(
412
467
  )
413
468
 
414
469
 
470
+ @timeit
471
+ def cleanup_rds_event_subscriptions(
472
+ neo4j_session: neo4j.Session,
473
+ common_job_parameters: Dict,
474
+ ) -> None:
475
+ """
476
+ Remove RDS event subscriptions that weren't updated in this sync run
477
+ """
478
+ logger.debug("Running RDS event subscriptions cleanup job")
479
+ GraphJob.from_node_schema(RDSEventSubscriptionSchema(), common_job_parameters).run(
480
+ neo4j_session
481
+ )
482
+
483
+
415
484
  @timeit
416
485
  def sync_rds_clusters(
417
486
  neo4j_session: neo4j.Session,
@@ -498,6 +567,32 @@ def sync_rds_snapshots(
498
567
  cleanup_rds_snapshots(neo4j_session, common_job_parameters)
499
568
 
500
569
 
570
+ @timeit
571
+ def sync_rds_event_subscriptions(
572
+ neo4j_session: neo4j.Session,
573
+ boto3_session: boto3.session.Session,
574
+ regions: List[str],
575
+ current_aws_account_id: str,
576
+ update_tag: int,
577
+ common_job_parameters: Dict,
578
+ ) -> None:
579
+ """
580
+ Grab RDS event subscription data from AWS, ingest to neo4j, and run the cleanup job.
581
+ """
582
+ for region in regions:
583
+ logger.info(
584
+ "Syncing RDS event subscriptions for region '%s' in account '%s'.",
585
+ region,
586
+ current_aws_account_id,
587
+ )
588
+ data = get_rds_event_subscription_data(boto3_session, region)
589
+ transformed = transform_rds_event_subscriptions(data)
590
+ load_rds_event_subscriptions(
591
+ neo4j_session, transformed, region, current_aws_account_id, update_tag
592
+ )
593
+ cleanup_rds_event_subscriptions(neo4j_session, common_job_parameters)
594
+
595
+
501
596
  @timeit
502
597
  def sync(
503
598
  neo4j_session: neo4j.Session,
@@ -531,6 +626,16 @@ def sync(
531
626
  update_tag,
532
627
  common_job_parameters,
533
628
  )
629
+
630
+ sync_rds_event_subscriptions(
631
+ neo4j_session,
632
+ boto3_session,
633
+ regions,
634
+ current_aws_account_id,
635
+ update_tag,
636
+ common_job_parameters,
637
+ )
638
+
534
639
  merge_module_sync_metadata(
535
640
  neo4j_session,
536
641
  group_type="AWSAccount",
@@ -9,6 +9,7 @@ from . import cloudtrail
9
9
  from . import cloudtrail_management_events
10
10
  from . import cloudwatch
11
11
  from . import codebuild
12
+ from . import cognito
12
13
  from . import config
13
14
  from . import dynamodb
14
15
  from . import ecr
@@ -116,6 +117,7 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
116
117
  "efs": efs.sync,
117
118
  "guardduty": guardduty.sync,
118
119
  "codebuild": codebuild.sync,
120
+ "cognito": cognito.sync,
119
121
  "eventbridge": eventbridge.sync,
120
122
  "glue": glue.sync,
121
123
  }
@@ -398,7 +398,9 @@ def link_sub_zones(
398
398
  MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(z:AWSDNSZone)
399
399
  <-[:MEMBER_OF_DNS_ZONE]-(record:DNSRecord{type:"NS"})
400
400
  -[:DNS_POINTS_TO]->(ns:NameServer)<-[:NAMESERVER]-(z2:AWSDNSZone)
401
- WHERE record.name=z2.name AND NOT z=z2
401
+ WHERE record.name = z2.name AND
402
+ z2.name ENDS WITH '.' + z.name AND
403
+ NOT z = z2
402
404
  RETURN z.id as zone_id, z2.id as subzone_id
403
405
  """
404
406
  zone_to_subzone = neo4j_session.read_transaction(
@@ -71,6 +71,7 @@ def get_s3_bucket_details(
71
71
  Dict[str, Any],
72
72
  Dict[str, Any],
73
73
  Dict[str, Any],
74
+ Dict[str, Any],
74
75
  ]
75
76
 
76
77
  async def _get_bucket_detail(bucket: Dict[str, Any]) -> BucketDetail:
@@ -88,6 +89,7 @@ def get_s3_bucket_details(
88
89
  versioning,
89
90
  public_access_block,
90
91
  bucket_ownership_controls,
92
+ bucket_logging,
91
93
  ) = await asyncio.gather(
92
94
  to_asynchronous(get_acl, bucket, client),
93
95
  to_asynchronous(get_policy, bucket, client),
@@ -95,6 +97,7 @@ def get_s3_bucket_details(
95
97
  to_asynchronous(get_versioning, bucket, client),
96
98
  to_asynchronous(get_public_access_block, bucket, client),
97
99
  to_asynchronous(get_bucket_ownership_controls, bucket, client),
100
+ to_asynchronous(get_bucket_logging, bucket, client),
98
101
  )
99
102
  return (
100
103
  bucket["Name"],
@@ -104,6 +107,7 @@ def get_s3_bucket_details(
104
107
  versioning,
105
108
  public_access_block,
106
109
  bucket_ownership_controls,
110
+ bucket_logging,
107
111
  )
108
112
 
109
113
  bucket_details = to_synchronous(
@@ -241,6 +245,29 @@ def get_bucket_ownership_controls(
241
245
  return bucket_ownership_controls
242
246
 
243
247
 
248
+ @timeit
249
+ @aws_handle_regions
250
+ def get_bucket_logging(
251
+ bucket: Dict, client: botocore.client.BaseClient
252
+ ) -> Optional[Dict]:
253
+ """
254
+ Gets the S3 bucket logging status configuration.
255
+ """
256
+ bucket_logging = None
257
+ try:
258
+ bucket_logging = client.get_bucket_logging(Bucket=bucket["Name"])
259
+ except ClientError as e:
260
+ if _is_common_exception(e, bucket):
261
+ pass
262
+ else:
263
+ raise
264
+ except EndpointConnectionError:
265
+ logger.warning(
266
+ f"Failed to retrieve S3 bucket logging status for {bucket['Name']} - Could not connect to the endpoint URL",
267
+ )
268
+ return bucket_logging
269
+
270
+
244
271
  @timeit
245
272
  def _is_common_exception(e: Exception, bucket: Dict) -> bool:
246
273
  error_msg = "Failed to retrieve S3 bucket detail"
@@ -319,6 +346,7 @@ def _load_s3_acls(
319
346
  "aws_s3acl_analysis.json",
320
347
  neo4j_session,
321
348
  {"AWS_ID": aws_account_id},
349
+ package="cartography.data.jobs.scoped_analysis",
322
350
  )
323
351
 
324
352
 
@@ -479,6 +507,30 @@ def _load_bucket_ownership_controls(
479
507
  )
480
508
 
481
509
 
510
+ @timeit
511
+ def _load_bucket_logging(
512
+ neo4j_session: neo4j.Session,
513
+ bucket_logging_configs: List[Dict],
514
+ update_tag: int,
515
+ ) -> None:
516
+ """
517
+ Ingest S3 bucket logging status configuration into neo4j.
518
+ """
519
+ # Load basic logging status
520
+ ingest_bucket_logging = """
521
+ UNWIND $bucket_logging_configs AS bucket_logging
522
+ MATCH (bucket:S3Bucket{name: bucket_logging.bucket})
523
+ SET bucket.logging_enabled = bucket_logging.logging_enabled,
524
+ bucket.logging_target_bucket = bucket_logging.target_bucket,
525
+ bucket.lastupdated = $update_tag
526
+ """
527
+ neo4j_session.run(
528
+ ingest_bucket_logging,
529
+ bucket_logging_configs=bucket_logging_configs,
530
+ update_tag=update_tag,
531
+ )
532
+
533
+
482
534
  def _set_default_values(neo4j_session: neo4j.Session, aws_account_id: str) -> None:
483
535
  set_defaults = """
484
536
  MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(s:S3Bucket) where s.anonymous_actions IS NULL
@@ -516,6 +568,7 @@ def load_s3_details(
516
568
  versioning_configs: List[Dict] = []
517
569
  public_access_block_configs: List[Dict] = []
518
570
  bucket_ownership_controls_configs: List[Dict] = []
571
+ bucket_logging_configs: List[Dict] = []
519
572
  for (
520
573
  bucket,
521
574
  acl,
@@ -524,6 +577,7 @@ def load_s3_details(
524
577
  versioning,
525
578
  public_access_block,
526
579
  bucket_ownership_controls,
580
+ bucket_logging,
527
581
  ) in s3_details_iter:
528
582
  parsed_acls = parse_acl(acl, bucket, aws_account_id)
529
583
  if parsed_acls is not None:
@@ -551,6 +605,9 @@ def load_s3_details(
551
605
  )
552
606
  if parsed_bucket_ownership_controls is not None:
553
607
  bucket_ownership_controls_configs.append(parsed_bucket_ownership_controls)
608
+ parsed_bucket_logging = parse_bucket_logging(bucket, bucket_logging)
609
+ if parsed_bucket_logging is not None:
610
+ bucket_logging_configs.append(parsed_bucket_logging)
554
611
 
555
612
  # cleanup existing policy properties set on S3 Buckets
556
613
  run_cleanup_job(
@@ -569,6 +626,7 @@ def load_s3_details(
569
626
  _load_bucket_ownership_controls(
570
627
  neo4j_session, bucket_ownership_controls_configs, update_tag
571
628
  )
629
+ _load_bucket_logging(neo4j_session, bucket_logging_configs, update_tag)
572
630
  _set_default_values(neo4j_session, aws_account_id)
573
631
 
574
632
 
@@ -851,6 +909,52 @@ def parse_bucket_ownership_controls(
851
909
  }
852
910
 
853
911
 
912
+ def parse_bucket_logging(bucket: str, bucket_logging: Optional[Dict]) -> Optional[Dict]:
913
+ """Parses the S3 bucket logging status configuration and returns a dict of the relevant data"""
914
+ # Logging status object JSON looks like:
915
+ # {
916
+ # 'LoggingEnabled': {
917
+ # 'TargetBucket': 'string',
918
+ # 'TargetGrants': [
919
+ # {
920
+ # 'Grantee': {
921
+ # 'DisplayName': 'string',
922
+ # 'EmailAddress': 'string',
923
+ # 'ID': 'string',
924
+ # 'Type': 'CanonicalUser'|'AmazonCustomerByEmail'|'Group',
925
+ # 'URI': 'string'
926
+ # },
927
+ # 'Permission': 'FULL_CONTROL'|'READ'|'WRITE'
928
+ # },
929
+ # ],
930
+ # 'TargetPrefix': 'string',
931
+ # 'TargetObjectKeyFormat': {
932
+ # 'SimplePrefix': {},
933
+ # 'PartitionedPrefix': {
934
+ # 'PartitionDateSource': 'EventTime'|'DeliveryTime'
935
+ # }
936
+ # }
937
+ # }
938
+ # }
939
+ # Or empty dict {} if logging is not enabled
940
+ if bucket_logging is None:
941
+ return None
942
+
943
+ logging_config = bucket_logging.get("LoggingEnabled", {})
944
+ if not logging_config:
945
+ return {
946
+ "bucket": bucket,
947
+ "logging_enabled": False,
948
+ "target_bucket": None,
949
+ }
950
+
951
+ return {
952
+ "bucket": bucket,
953
+ "logging_enabled": True,
954
+ "target_bucket": logging_config.get("TargetBucket"),
955
+ }
956
+
957
+
854
958
  @timeit
855
959
  def parse_notification_configuration(
856
960
  bucket: str, notification_config: Optional[Dict]
@@ -1,14 +1,13 @@
1
1
  import asyncio
2
- import datetime
3
2
  import logging
4
- from traceback import TracebackException
5
- from typing import Awaitable
6
- from typing import Callable
7
3
 
8
4
  import neo4j
9
5
 
10
6
  from cartography.config import Config
11
- from cartography.intel.entra.resources import RESOURCE_FUNCTIONS
7
+ from cartography.intel.entra.applications import sync_entra_applications
8
+ from cartography.intel.entra.groups import sync_entra_groups
9
+ from cartography.intel.entra.ou import sync_entra_ous
10
+ from cartography.intel.entra.users import sync_entra_users
12
11
  from cartography.util import timeit
13
12
 
14
13
  logger = logging.getLogger(__name__)
@@ -40,46 +39,45 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
40
39
  }
41
40
 
42
41
  async def main() -> None:
43
- failed_stages = []
44
- exception_tracebacks = []
42
+ # Run user sync
43
+ await sync_entra_users(
44
+ neo4j_session,
45
+ config.entra_tenant_id,
46
+ config.entra_client_id,
47
+ config.entra_client_secret,
48
+ config.update_tag,
49
+ common_job_parameters,
50
+ )
45
51
 
46
- async def run_stage(name: str, func: Callable[..., Awaitable[None]]) -> None:
47
- try:
48
- await func(
49
- neo4j_session,
50
- config.entra_tenant_id,
51
- config.entra_client_id,
52
- config.entra_client_secret,
53
- config.update_tag,
54
- common_job_parameters,
55
- )
56
- except Exception as e:
57
- if config.entra_best_effort_mode:
58
- timestamp = datetime.datetime.now()
59
- failed_stages.append(name)
60
- exception_traceback = TracebackException.from_exception(e)
61
- traceback_string = "".join(exception_traceback.format())
62
- exception_tracebacks.append(
63
- f"{timestamp} - Exception for stage {name}\n{traceback_string}"
64
- )
65
- logger.warning(
66
- f"Caught exception syncing {name}. entra-best-effort-mode is on so we are continuing "
67
- "on to the next Entra sync. All exceptions will be aggregated and re-logged at the end of the sync.",
68
- exc_info=True,
69
- )
70
- else:
71
- logger.error("Error during Entra sync", exc_info=True)
72
- raise
52
+ # Run group sync
53
+ await sync_entra_groups(
54
+ neo4j_session,
55
+ config.entra_tenant_id,
56
+ config.entra_client_id,
57
+ config.entra_client_secret,
58
+ config.update_tag,
59
+ common_job_parameters,
60
+ )
73
61
 
74
- for name, func in RESOURCE_FUNCTIONS:
75
- await run_stage(name, func)
62
+ # Run OU sync
63
+ await sync_entra_ous(
64
+ neo4j_session,
65
+ config.entra_tenant_id,
66
+ config.entra_client_id,
67
+ config.entra_client_secret,
68
+ config.update_tag,
69
+ common_job_parameters,
70
+ )
76
71
 
77
- if failed_stages:
78
- logger.error(
79
- f"Entra sync failed for the following stages: {', '.join(failed_stages)}. "
80
- "See the logs for more details.",
81
- )
82
- raise Exception("\n".join(exception_tracebacks))
72
+ # Run application sync
73
+ await sync_entra_applications(
74
+ neo4j_session,
75
+ config.entra_tenant_id,
76
+ config.entra_client_id,
77
+ config.entra_client_secret,
78
+ config.update_tag,
79
+ common_job_parameters,
80
+ )
83
81
 
84
- # Execute all syncs in sequence
82
+ # Execute both syncs in sequence
85
83
  asyncio.run(main())
@@ -172,11 +172,12 @@ async def get_app_role_assignments(
172
172
  )
173
173
  continue
174
174
  except Exception as e:
175
+ # Only catch truly unexpected errors - these should be rare
175
176
  logger.error(
176
177
  f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
177
178
  exc_info=True,
178
179
  )
179
- raise
180
+ continue
180
181
 
181
182
  logger.info(f"Retrieved {len(assignments)} app role assignments total")
182
183
  return assignments
@@ -43,7 +43,7 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
43
43
  current_request = None
44
44
  except Exception as e:
45
45
  logger.error(f"Failed to retrieve administrative units: {str(e)}")
46
- raise
46
+ current_request = None
47
47
 
48
48
  return all_units
49
49
 
@@ -3,7 +3,6 @@ import json
3
3
  import logging
4
4
 
5
5
  import neo4j
6
- from requests import exceptions
7
6
 
8
7
  import cartography.intel.github.repos
9
8
  import cartography.intel.github.teams
@@ -34,27 +33,24 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
34
33
  }
35
34
  # run sync for the provided github tokens
36
35
  for auth_data in auth_tokens["organization"]:
37
- try:
38
- cartography.intel.github.users.sync(
39
- neo4j_session,
40
- common_job_parameters,
41
- auth_data["token"],
42
- auth_data["url"],
43
- auth_data["name"],
44
- )
45
- cartography.intel.github.repos.sync(
46
- neo4j_session,
47
- common_job_parameters,
48
- auth_data["token"],
49
- auth_data["url"],
50
- auth_data["name"],
51
- )
52
- cartography.intel.github.teams.sync_github_teams(
53
- neo4j_session,
54
- common_job_parameters,
55
- auth_data["token"],
56
- auth_data["url"],
57
- auth_data["name"],
58
- )
59
- except exceptions.RequestException as e:
60
- logger.error("Could not complete request to the GitHub API: %s", e)
36
+ cartography.intel.github.users.sync(
37
+ neo4j_session,
38
+ common_job_parameters,
39
+ auth_data["token"],
40
+ auth_data["url"],
41
+ auth_data["name"],
42
+ )
43
+ cartography.intel.github.repos.sync(
44
+ neo4j_session,
45
+ common_job_parameters,
46
+ auth_data["token"],
47
+ auth_data["url"],
48
+ auth_data["name"],
49
+ )
50
+ cartography.intel.github.teams.sync_github_teams(
51
+ neo4j_session,
52
+ common_job_parameters,
53
+ auth_data["token"],
54
+ auth_data["url"],
55
+ auth_data["name"],
56
+ )
@@ -405,9 +405,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
405
405
 
406
406
  def _create_git_url_from_ssh_url(ssh_url: str) -> str:
407
407
  """
408
- Return a git:// URL from the given ssh_url
408
+ Convert SSH URL to git:// URL.
409
+ Example:
410
+ git@github.com:cartography-cncf/cartography.git
411
+ -> git://github.com/cartography-cncf/cartography.git
409
412
  """
410
- return ssh_url.replace("/", ":").replace("git@", "git://")
413
+ # Remove the user part (e.g., "git@")
414
+ _, host_and_path = ssh_url.split("@", 1)
415
+ # Replace first ':' (separating host and repo) with '/'
416
+ host, path = host_and_path.split(":", 1)
417
+ return f"git://{host}/{path}"
411
418
 
412
419
 
413
420
  def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
@@ -647,9 +654,6 @@ def _transform_dependency_graph(
647
654
  requirements = dep.get("requirements", "")
648
655
  package_manager = dep.get("packageManager", "").upper()
649
656
 
650
- # Extract version from requirements string if available
651
- pinned_version = _extract_version_from_requirements(requirements)
652
-
653
657
  # Create ecosystem-specific canonical name
654
658
  canonical_name = _canonicalize_dependency_name(
655
659
  package_name, package_manager
@@ -658,11 +662,12 @@ def _transform_dependency_graph(
658
662
  # Create ecosystem identifier
659
663
  ecosystem = package_manager.lower() if package_manager else "unknown"
660
664
 
661
- # Create simple dependency ID using canonical name and version
665
+ # Create simple dependency ID using canonical name and requirements
662
666
  # This allows the same dependency to be shared across multiple repos
667
+ requirements_for_id = (requirements or "").strip()
663
668
  dependency_id = (
664
- f"{canonical_name}|{pinned_version}"
665
- if pinned_version
669
+ f"{canonical_name}|{requirements_for_id}"
670
+ if requirements_for_id
666
671
  else canonical_name
667
672
  )
668
673
 
@@ -677,15 +682,12 @@ def _transform_dependency_graph(
677
682
  "id": dependency_id,
678
683
  "name": canonical_name,
679
684
  "original_name": package_name, # Keep original for reference
680
- "version": pinned_version,
681
685
  "requirements": normalized_requirements,
682
686
  "ecosystem": ecosystem,
683
687
  "package_manager": package_manager,
684
688
  "manifest_path": manifest_path,
685
689
  "manifest_id": manifest_id,
686
690
  "repo_url": repo_url,
687
- # Add separate fields for easier querying
688
- "repo_name": repo_url.split("/")[-1] if repo_url else "",
689
691
  "manifest_file": (
690
692
  manifest_path.split("/")[-1] if manifest_path else ""
691
693
  ),
@@ -698,33 +700,6 @@ def _transform_dependency_graph(
698
700
  logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
699
701
 
700
702
 
701
- def _extract_version_from_requirements(requirements: Optional[str]) -> Optional[str]:
702
- """
703
- Extract a pinned version from a requirements string if it exists.
704
- Examples: "1.2.3" -> "1.2.3", "^1.2.3" -> None, ">=1.0,<2.0" -> None
705
- """
706
- if not requirements or not requirements.strip():
707
- return None
708
-
709
- # Handle exact version specifications (no operators)
710
- if requirements and not any(
711
- op in requirements for op in ["^", "~", ">", "<", "=", "*"]
712
- ):
713
- stripped = requirements.strip()
714
- return stripped if stripped else None
715
-
716
- # Handle == specifications
717
- if "==" in requirements:
718
- parts = requirements.split("==")
719
- if len(parts) == 2:
720
- version = parts[1].strip()
721
- # Remove any trailing constraints
722
- version = version.split(",")[0].split(" ")[0]
723
- return version if version else None
724
-
725
- return None
726
-
727
-
728
703
  def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
729
704
  """
730
705
  Canonicalize dependency names based on ecosystem conventions.
@@ -6,6 +6,7 @@ from cartography.config import Config
6
6
  from cartography.intel.kubernetes.clusters import sync_kubernetes_cluster
7
7
  from cartography.intel.kubernetes.namespaces import sync_namespaces
8
8
  from cartography.intel.kubernetes.pods import sync_pods
9
+ from cartography.intel.kubernetes.rbac import sync_kubernetes_rbac
9
10
  from cartography.intel.kubernetes.secrets import sync_secrets
10
11
  from cartography.intel.kubernetes.services import sync_services
11
12
  from cartography.intel.kubernetes.util import get_k8s_clients
@@ -38,6 +39,9 @@ def start_k8s_ingestion(session: Session, config: Config) -> None:
38
39
  common_job_parameters["CLUSTER_ID"] = cluster_info.get("id")
39
40
 
40
41
  sync_namespaces(session, client, config.update_tag, common_job_parameters)
42
+ sync_kubernetes_rbac(
43
+ session, client, config.update_tag, common_job_parameters
44
+ )
41
45
  all_pods = sync_pods(
42
46
  session,
43
47
  client,