cartography 0.110.0rc1__py3-none-any.whl → 0.110.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (43) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +0 -8
  3. cartography/config.py +0 -9
  4. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  5. cartography/intel/aws/cognito.py +201 -0
  6. cartography/intel/aws/ecs.py +7 -1
  7. cartography/intel/aws/glue.py +64 -0
  8. cartography/intel/aws/kms.py +13 -1
  9. cartography/intel/aws/rds.py +105 -0
  10. cartography/intel/aws/resources.py +2 -0
  11. cartography/intel/aws/route53.py +3 -1
  12. cartography/intel/aws/s3.py +104 -0
  13. cartography/intel/entra/__init__.py +41 -43
  14. cartography/intel/entra/applications.py +2 -1
  15. cartography/intel/entra/ou.py +1 -1
  16. cartography/intel/github/__init__.py +21 -25
  17. cartography/intel/github/repos.py +4 -36
  18. cartography/intel/kubernetes/__init__.py +4 -0
  19. cartography/intel/kubernetes/rbac.py +464 -0
  20. cartography/intel/kubernetes/util.py +17 -0
  21. cartography/models/aws/cognito/__init__.py +0 -0
  22. cartography/models/aws/cognito/identity_pool.py +70 -0
  23. cartography/models/aws/cognito/user_pool.py +47 -0
  24. cartography/models/aws/ec2/security_groups.py +1 -1
  25. cartography/models/aws/ecs/services.py +17 -0
  26. cartography/models/aws/ecs/tasks.py +1 -0
  27. cartography/models/aws/glue/job.py +69 -0
  28. cartography/models/aws/rds/event_subscription.py +146 -0
  29. cartography/models/aws/route53/dnsrecord.py +21 -0
  30. cartography/models/github/dependencies.py +1 -2
  31. cartography/models/kubernetes/clusterrolebindings.py +98 -0
  32. cartography/models/kubernetes/clusterroles.py +52 -0
  33. cartography/models/kubernetes/rolebindings.py +119 -0
  34. cartography/models/kubernetes/roles.py +76 -0
  35. cartography/models/kubernetes/serviceaccounts.py +77 -0
  36. {cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/METADATA +3 -3
  37. {cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/RECORD +42 -31
  38. cartography/intel/entra/resources.py +0 -20
  39. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  40. {cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/WHEEL +0 -0
  41. {cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/entry_points.txt +0 -0
  42. {cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/licenses/LICENSE +0 -0
  43. {cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/top_level.txt +0 -0
@@ -71,6 +71,7 @@ def get_s3_bucket_details(
71
71
  Dict[str, Any],
72
72
  Dict[str, Any],
73
73
  Dict[str, Any],
74
+ Dict[str, Any],
74
75
  ]
75
76
 
76
77
  async def _get_bucket_detail(bucket: Dict[str, Any]) -> BucketDetail:
@@ -88,6 +89,7 @@ def get_s3_bucket_details(
88
89
  versioning,
89
90
  public_access_block,
90
91
  bucket_ownership_controls,
92
+ bucket_logging,
91
93
  ) = await asyncio.gather(
92
94
  to_asynchronous(get_acl, bucket, client),
93
95
  to_asynchronous(get_policy, bucket, client),
@@ -95,6 +97,7 @@ def get_s3_bucket_details(
95
97
  to_asynchronous(get_versioning, bucket, client),
96
98
  to_asynchronous(get_public_access_block, bucket, client),
97
99
  to_asynchronous(get_bucket_ownership_controls, bucket, client),
100
+ to_asynchronous(get_bucket_logging, bucket, client),
98
101
  )
99
102
  return (
100
103
  bucket["Name"],
@@ -104,6 +107,7 @@ def get_s3_bucket_details(
104
107
  versioning,
105
108
  public_access_block,
106
109
  bucket_ownership_controls,
110
+ bucket_logging,
107
111
  )
108
112
 
109
113
  bucket_details = to_synchronous(
@@ -241,6 +245,29 @@ def get_bucket_ownership_controls(
241
245
  return bucket_ownership_controls
242
246
 
243
247
 
248
+ @timeit
249
+ @aws_handle_regions
250
+ def get_bucket_logging(
251
+ bucket: Dict, client: botocore.client.BaseClient
252
+ ) -> Optional[Dict]:
253
+ """
254
+ Gets the S3 bucket logging status configuration.
255
+ """
256
+ bucket_logging = None
257
+ try:
258
+ bucket_logging = client.get_bucket_logging(Bucket=bucket["Name"])
259
+ except ClientError as e:
260
+ if _is_common_exception(e, bucket):
261
+ pass
262
+ else:
263
+ raise
264
+ except EndpointConnectionError:
265
+ logger.warning(
266
+ f"Failed to retrieve S3 bucket logging status for {bucket['Name']} - Could not connect to the endpoint URL",
267
+ )
268
+ return bucket_logging
269
+
270
+
244
271
  @timeit
245
272
  def _is_common_exception(e: Exception, bucket: Dict) -> bool:
246
273
  error_msg = "Failed to retrieve S3 bucket detail"
@@ -319,6 +346,7 @@ def _load_s3_acls(
319
346
  "aws_s3acl_analysis.json",
320
347
  neo4j_session,
321
348
  {"AWS_ID": aws_account_id},
349
+ package="cartography.data.jobs.scoped_analysis",
322
350
  )
323
351
 
324
352
 
@@ -479,6 +507,30 @@ def _load_bucket_ownership_controls(
479
507
  )
480
508
 
481
509
 
510
+ @timeit
511
+ def _load_bucket_logging(
512
+ neo4j_session: neo4j.Session,
513
+ bucket_logging_configs: List[Dict],
514
+ update_tag: int,
515
+ ) -> None:
516
+ """
517
+ Ingest S3 bucket logging status configuration into neo4j.
518
+ """
519
+ # Load basic logging status
520
+ ingest_bucket_logging = """
521
+ UNWIND $bucket_logging_configs AS bucket_logging
522
+ MATCH (bucket:S3Bucket{name: bucket_logging.bucket})
523
+ SET bucket.logging_enabled = bucket_logging.logging_enabled,
524
+ bucket.logging_target_bucket = bucket_logging.target_bucket,
525
+ bucket.lastupdated = $update_tag
526
+ """
527
+ neo4j_session.run(
528
+ ingest_bucket_logging,
529
+ bucket_logging_configs=bucket_logging_configs,
530
+ update_tag=update_tag,
531
+ )
532
+
533
+
482
534
  def _set_default_values(neo4j_session: neo4j.Session, aws_account_id: str) -> None:
483
535
  set_defaults = """
484
536
  MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(s:S3Bucket) where s.anonymous_actions IS NULL
@@ -516,6 +568,7 @@ def load_s3_details(
516
568
  versioning_configs: List[Dict] = []
517
569
  public_access_block_configs: List[Dict] = []
518
570
  bucket_ownership_controls_configs: List[Dict] = []
571
+ bucket_logging_configs: List[Dict] = []
519
572
  for (
520
573
  bucket,
521
574
  acl,
@@ -524,6 +577,7 @@ def load_s3_details(
524
577
  versioning,
525
578
  public_access_block,
526
579
  bucket_ownership_controls,
580
+ bucket_logging,
527
581
  ) in s3_details_iter:
528
582
  parsed_acls = parse_acl(acl, bucket, aws_account_id)
529
583
  if parsed_acls is not None:
@@ -551,6 +605,9 @@ def load_s3_details(
551
605
  )
552
606
  if parsed_bucket_ownership_controls is not None:
553
607
  bucket_ownership_controls_configs.append(parsed_bucket_ownership_controls)
608
+ parsed_bucket_logging = parse_bucket_logging(bucket, bucket_logging)
609
+ if parsed_bucket_logging is not None:
610
+ bucket_logging_configs.append(parsed_bucket_logging)
554
611
 
555
612
  # cleanup existing policy properties set on S3 Buckets
556
613
  run_cleanup_job(
@@ -569,6 +626,7 @@ def load_s3_details(
569
626
  _load_bucket_ownership_controls(
570
627
  neo4j_session, bucket_ownership_controls_configs, update_tag
571
628
  )
629
+ _load_bucket_logging(neo4j_session, bucket_logging_configs, update_tag)
572
630
  _set_default_values(neo4j_session, aws_account_id)
573
631
 
574
632
 
@@ -851,6 +909,52 @@ def parse_bucket_ownership_controls(
851
909
  }
852
910
 
853
911
 
912
+ def parse_bucket_logging(bucket: str, bucket_logging: Optional[Dict]) -> Optional[Dict]:
913
+ """Parses the S3 bucket logging status configuration and returns a dict of the relevant data"""
914
+ # Logging status object JSON looks like:
915
+ # {
916
+ # 'LoggingEnabled': {
917
+ # 'TargetBucket': 'string',
918
+ # 'TargetGrants': [
919
+ # {
920
+ # 'Grantee': {
921
+ # 'DisplayName': 'string',
922
+ # 'EmailAddress': 'string',
923
+ # 'ID': 'string',
924
+ # 'Type': 'CanonicalUser'|'AmazonCustomerByEmail'|'Group',
925
+ # 'URI': 'string'
926
+ # },
927
+ # 'Permission': 'FULL_CONTROL'|'READ'|'WRITE'
928
+ # },
929
+ # ],
930
+ # 'TargetPrefix': 'string',
931
+ # 'TargetObjectKeyFormat': {
932
+ # 'SimplePrefix': {},
933
+ # 'PartitionedPrefix': {
934
+ # 'PartitionDateSource': 'EventTime'|'DeliveryTime'
935
+ # }
936
+ # }
937
+ # }
938
+ # }
939
+ # Or empty dict {} if logging is not enabled
940
+ if bucket_logging is None:
941
+ return None
942
+
943
+ logging_config = bucket_logging.get("LoggingEnabled", {})
944
+ if not logging_config:
945
+ return {
946
+ "bucket": bucket,
947
+ "logging_enabled": False,
948
+ "target_bucket": None,
949
+ }
950
+
951
+ return {
952
+ "bucket": bucket,
953
+ "logging_enabled": True,
954
+ "target_bucket": logging_config.get("TargetBucket"),
955
+ }
956
+
957
+
854
958
  @timeit
855
959
  def parse_notification_configuration(
856
960
  bucket: str, notification_config: Optional[Dict]
@@ -1,14 +1,13 @@
1
1
  import asyncio
2
- import datetime
3
2
  import logging
4
- from traceback import TracebackException
5
- from typing import Awaitable
6
- from typing import Callable
7
3
 
8
4
  import neo4j
9
5
 
10
6
  from cartography.config import Config
11
- from cartography.intel.entra.resources import RESOURCE_FUNCTIONS
7
+ from cartography.intel.entra.applications import sync_entra_applications
8
+ from cartography.intel.entra.groups import sync_entra_groups
9
+ from cartography.intel.entra.ou import sync_entra_ous
10
+ from cartography.intel.entra.users import sync_entra_users
12
11
  from cartography.util import timeit
13
12
 
14
13
  logger = logging.getLogger(__name__)
@@ -40,46 +39,45 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
40
39
  }
41
40
 
42
41
  async def main() -> None:
43
- failed_stages = []
44
- exception_tracebacks = []
42
+ # Run user sync
43
+ await sync_entra_users(
44
+ neo4j_session,
45
+ config.entra_tenant_id,
46
+ config.entra_client_id,
47
+ config.entra_client_secret,
48
+ config.update_tag,
49
+ common_job_parameters,
50
+ )
45
51
 
46
- async def run_stage(name: str, func: Callable[..., Awaitable[None]]) -> None:
47
- try:
48
- await func(
49
- neo4j_session,
50
- config.entra_tenant_id,
51
- config.entra_client_id,
52
- config.entra_client_secret,
53
- config.update_tag,
54
- common_job_parameters,
55
- )
56
- except Exception as e:
57
- if config.entra_best_effort_mode:
58
- timestamp = datetime.datetime.now()
59
- failed_stages.append(name)
60
- exception_traceback = TracebackException.from_exception(e)
61
- traceback_string = "".join(exception_traceback.format())
62
- exception_tracebacks.append(
63
- f"{timestamp} - Exception for stage {name}\n{traceback_string}"
64
- )
65
- logger.warning(
66
- f"Caught exception syncing {name}. entra-best-effort-mode is on so we are continuing "
67
- "on to the next Entra sync. All exceptions will be aggregated and re-logged at the end of the sync.",
68
- exc_info=True,
69
- )
70
- else:
71
- logger.error("Error during Entra sync", exc_info=True)
72
- raise
52
+ # Run group sync
53
+ await sync_entra_groups(
54
+ neo4j_session,
55
+ config.entra_tenant_id,
56
+ config.entra_client_id,
57
+ config.entra_client_secret,
58
+ config.update_tag,
59
+ common_job_parameters,
60
+ )
73
61
 
74
- for name, func in RESOURCE_FUNCTIONS:
75
- await run_stage(name, func)
62
+ # Run OU sync
63
+ await sync_entra_ous(
64
+ neo4j_session,
65
+ config.entra_tenant_id,
66
+ config.entra_client_id,
67
+ config.entra_client_secret,
68
+ config.update_tag,
69
+ common_job_parameters,
70
+ )
76
71
 
77
- if failed_stages:
78
- logger.error(
79
- f"Entra sync failed for the following stages: {', '.join(failed_stages)}. "
80
- "See the logs for more details.",
81
- )
82
- raise Exception("\n".join(exception_tracebacks))
72
+ # Run application sync
73
+ await sync_entra_applications(
74
+ neo4j_session,
75
+ config.entra_tenant_id,
76
+ config.entra_client_id,
77
+ config.entra_client_secret,
78
+ config.update_tag,
79
+ common_job_parameters,
80
+ )
83
81
 
84
- # Execute all syncs in sequence
82
+ # Execute both syncs in sequence
85
83
  asyncio.run(main())
@@ -172,11 +172,12 @@ async def get_app_role_assignments(
172
172
  )
173
173
  continue
174
174
  except Exception as e:
175
+ # Only catch truly unexpected errors - these should be rare
175
176
  logger.error(
176
177
  f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
177
178
  exc_info=True,
178
179
  )
179
- raise
180
+ continue
180
181
 
181
182
  logger.info(f"Retrieved {len(assignments)} app role assignments total")
182
183
  return assignments
@@ -43,7 +43,7 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
43
43
  current_request = None
44
44
  except Exception as e:
45
45
  logger.error(f"Failed to retrieve administrative units: {str(e)}")
46
- raise
46
+ current_request = None
47
47
 
48
48
  return all_units
49
49
 
@@ -3,7 +3,6 @@ import json
3
3
  import logging
4
4
 
5
5
  import neo4j
6
- from requests import exceptions
7
6
 
8
7
  import cartography.intel.github.repos
9
8
  import cartography.intel.github.teams
@@ -34,27 +33,24 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
34
33
  }
35
34
  # run sync for the provided github tokens
36
35
  for auth_data in auth_tokens["organization"]:
37
- try:
38
- cartography.intel.github.users.sync(
39
- neo4j_session,
40
- common_job_parameters,
41
- auth_data["token"],
42
- auth_data["url"],
43
- auth_data["name"],
44
- )
45
- cartography.intel.github.repos.sync(
46
- neo4j_session,
47
- common_job_parameters,
48
- auth_data["token"],
49
- auth_data["url"],
50
- auth_data["name"],
51
- )
52
- cartography.intel.github.teams.sync_github_teams(
53
- neo4j_session,
54
- common_job_parameters,
55
- auth_data["token"],
56
- auth_data["url"],
57
- auth_data["name"],
58
- )
59
- except exceptions.RequestException as e:
60
- logger.error("Could not complete request to the GitHub API: %s", e)
36
+ cartography.intel.github.users.sync(
37
+ neo4j_session,
38
+ common_job_parameters,
39
+ auth_data["token"],
40
+ auth_data["url"],
41
+ auth_data["name"],
42
+ )
43
+ cartography.intel.github.repos.sync(
44
+ neo4j_session,
45
+ common_job_parameters,
46
+ auth_data["token"],
47
+ auth_data["url"],
48
+ auth_data["name"],
49
+ )
50
+ cartography.intel.github.teams.sync_github_teams(
51
+ neo4j_session,
52
+ common_job_parameters,
53
+ auth_data["token"],
54
+ auth_data["url"],
55
+ auth_data["name"],
56
+ )
@@ -647,9 +647,6 @@ def _transform_dependency_graph(
647
647
  requirements = dep.get("requirements", "")
648
648
  package_manager = dep.get("packageManager", "").upper()
649
649
 
650
- # Extract version from requirements string if available
651
- pinned_version = _extract_version_from_requirements(requirements)
652
-
653
650
  # Create ecosystem-specific canonical name
654
651
  canonical_name = _canonicalize_dependency_name(
655
652
  package_name, package_manager
@@ -658,11 +655,12 @@ def _transform_dependency_graph(
658
655
  # Create ecosystem identifier
659
656
  ecosystem = package_manager.lower() if package_manager else "unknown"
660
657
 
661
- # Create simple dependency ID using canonical name and version
658
+ # Create simple dependency ID using canonical name and requirements
662
659
  # This allows the same dependency to be shared across multiple repos
660
+ requirements_for_id = (requirements or "").strip()
663
661
  dependency_id = (
664
- f"{canonical_name}|{pinned_version}"
665
- if pinned_version
662
+ f"{canonical_name}|{requirements_for_id}"
663
+ if requirements_for_id
666
664
  else canonical_name
667
665
  )
668
666
 
@@ -677,15 +675,12 @@ def _transform_dependency_graph(
677
675
  "id": dependency_id,
678
676
  "name": canonical_name,
679
677
  "original_name": package_name, # Keep original for reference
680
- "version": pinned_version,
681
678
  "requirements": normalized_requirements,
682
679
  "ecosystem": ecosystem,
683
680
  "package_manager": package_manager,
684
681
  "manifest_path": manifest_path,
685
682
  "manifest_id": manifest_id,
686
683
  "repo_url": repo_url,
687
- # Add separate fields for easier querying
688
- "repo_name": repo_url.split("/")[-1] if repo_url else "",
689
684
  "manifest_file": (
690
685
  manifest_path.split("/")[-1] if manifest_path else ""
691
686
  ),
@@ -698,33 +693,6 @@ def _transform_dependency_graph(
698
693
  logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
699
694
 
700
695
 
701
- def _extract_version_from_requirements(requirements: Optional[str]) -> Optional[str]:
702
- """
703
- Extract a pinned version from a requirements string if it exists.
704
- Examples: "1.2.3" -> "1.2.3", "^1.2.3" -> None, ">=1.0,<2.0" -> None
705
- """
706
- if not requirements or not requirements.strip():
707
- return None
708
-
709
- # Handle exact version specifications (no operators)
710
- if requirements and not any(
711
- op in requirements for op in ["^", "~", ">", "<", "=", "*"]
712
- ):
713
- stripped = requirements.strip()
714
- return stripped if stripped else None
715
-
716
- # Handle == specifications
717
- if "==" in requirements:
718
- parts = requirements.split("==")
719
- if len(parts) == 2:
720
- version = parts[1].strip()
721
- # Remove any trailing constraints
722
- version = version.split(",")[0].split(" ")[0]
723
- return version if version else None
724
-
725
- return None
726
-
727
-
728
696
  def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
729
697
  """
730
698
  Canonicalize dependency names based on ecosystem conventions.
@@ -6,6 +6,7 @@ from cartography.config import Config
6
6
  from cartography.intel.kubernetes.clusters import sync_kubernetes_cluster
7
7
  from cartography.intel.kubernetes.namespaces import sync_namespaces
8
8
  from cartography.intel.kubernetes.pods import sync_pods
9
+ from cartography.intel.kubernetes.rbac import sync_kubernetes_rbac
9
10
  from cartography.intel.kubernetes.secrets import sync_secrets
10
11
  from cartography.intel.kubernetes.services import sync_services
11
12
  from cartography.intel.kubernetes.util import get_k8s_clients
@@ -38,6 +39,9 @@ def start_k8s_ingestion(session: Session, config: Config) -> None:
38
39
  common_job_parameters["CLUSTER_ID"] = cluster_info.get("id")
39
40
 
40
41
  sync_namespaces(session, client, config.update_tag, common_job_parameters)
42
+ sync_kubernetes_rbac(
43
+ session, client, config.update_tag, common_job_parameters
44
+ )
41
45
  all_pods = sync_pods(
42
46
  session,
43
47
  client,