PyPI - cartography - Versions diffs - 0.110.0rc1__py3-none-any.whl → 0.110.0rc2__py3-none-any.whl - Mend

cartography 0.110.0rc1py3-none-any.whl → 0.110.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cartography might be problematic. Click here for more details.

Files changed (43) hide show

cartography/_version.py +2 -2
cartography/cli.py +0 -8
cartography/config.py +0 -9
cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
cartography/intel/aws/cognito.py +201 -0
cartography/intel/aws/ecs.py +7 -1
cartography/intel/aws/glue.py +64 -0
cartography/intel/aws/kms.py +13 -1
cartography/intel/aws/rds.py +105 -0
cartography/intel/aws/resources.py +2 -0
cartography/intel/aws/route53.py +3 -1
cartography/intel/aws/s3.py +104 -0
cartography/intel/entra/__init__.py +41 -43
cartography/intel/entra/applications.py +2 -1
cartography/intel/entra/ou.py +1 -1
cartography/intel/github/__init__.py +21 -25
cartography/intel/github/repos.py +4 -36
cartography/intel/kubernetes/__init__.py +4 -0
cartography/intel/kubernetes/rbac.py +464 -0
cartography/intel/kubernetes/util.py +17 -0
cartography/models/aws/cognito/__init__.py +0 -0
cartography/models/aws/cognito/identity_pool.py +70 -0
cartography/models/aws/cognito/user_pool.py +47 -0
cartography/models/aws/ec2/security_groups.py +1 -1
cartography/models/aws/ecs/services.py +17 -0
cartography/models/aws/ecs/tasks.py +1 -0
cartography/models/aws/glue/job.py +69 -0
cartography/models/aws/rds/event_subscription.py +146 -0
cartography/models/aws/route53/dnsrecord.py +21 -0
cartography/models/github/dependencies.py +1 -2
cartography/models/kubernetes/clusterrolebindings.py +98 -0
cartography/models/kubernetes/clusterroles.py +52 -0
cartography/models/kubernetes/rolebindings.py +119 -0
cartography/models/kubernetes/roles.py +76 -0
cartography/models/kubernetes/serviceaccounts.py +77 -0
{cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/METADATA +3 -3
{cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/RECORD +42 -31
cartography/intel/entra/resources.py +0 -20
/cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
{cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/WHEEL +0 -0
{cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/entry_points.txt +0 -0
{cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/licenses/LICENSE +0 -0
{cartography-0.110.0rc1.dist-info → cartography-0.110.0rc2.dist-info}/top_level.txt +0 -0

cartography/intel/aws/s3.py CHANGED Viewed

@@ -71,6 +71,7 @@ def get_s3_bucket_details(
         Dict[str, Any],
         Dict[str, Any],
         Dict[str, Any],
+        Dict[str, Any],
     ]
     async def _get_bucket_detail(bucket: Dict[str, Any]) -> BucketDetail:
@@ -88,6 +89,7 @@ def get_s3_bucket_details(
             versioning,
             public_access_block,
             bucket_ownership_controls,
+            bucket_logging,
         ) = await asyncio.gather(
             to_asynchronous(get_acl, bucket, client),
             to_asynchronous(get_policy, bucket, client),
@@ -95,6 +97,7 @@ def get_s3_bucket_details(
             to_asynchronous(get_versioning, bucket, client),
             to_asynchronous(get_public_access_block, bucket, client),
             to_asynchronous(get_bucket_ownership_controls, bucket, client),
+            to_asynchronous(get_bucket_logging, bucket, client),
         )
         return (
             bucket["Name"],
@@ -104,6 +107,7 @@ def get_s3_bucket_details(
             versioning,
             public_access_block,
             bucket_ownership_controls,
+            bucket_logging,
         )
     bucket_details = to_synchronous(
@@ -241,6 +245,29 @@ def get_bucket_ownership_controls(
     return bucket_ownership_controls
+@timeit
+@aws_handle_regions
+def get_bucket_logging(
+    bucket: Dict, client: botocore.client.BaseClient
+) -> Optional[Dict]:
+    """
+    Gets the S3 bucket logging status configuration.
+    """
+    bucket_logging = None
+    try:
+        bucket_logging = client.get_bucket_logging(Bucket=bucket["Name"])
+    except ClientError as e:
+        if _is_common_exception(e, bucket):
+            pass
+        else:
+            raise
+    except EndpointConnectionError:
+        logger.warning(
+            f"Failed to retrieve S3 bucket logging status for {bucket['Name']} - Could not connect to the endpoint URL",
+        )
+    return bucket_logging
 @timeit
 def _is_common_exception(e: Exception, bucket: Dict) -> bool:
     error_msg = "Failed to retrieve S3 bucket detail"
@@ -319,6 +346,7 @@ def _load_s3_acls(
         "aws_s3acl_analysis.json",
         neo4j_session,
         {"AWS_ID": aws_account_id},
+        package="cartography.data.jobs.scoped_analysis",
     )
@@ -479,6 +507,30 @@ def _load_bucket_ownership_controls(
     )
+@timeit
+def _load_bucket_logging(
+    neo4j_session: neo4j.Session,
+    bucket_logging_configs: List[Dict],
+    update_tag: int,
+) -> None:
+    """
+    Ingest S3 bucket logging status configuration into neo4j.
+    """
+    # Load basic logging status
+    ingest_bucket_logging = """
+    UNWIND $bucket_logging_configs AS bucket_logging
+    MATCH (bucket:S3Bucket{name: bucket_logging.bucket})
+    SET bucket.logging_enabled = bucket_logging.logging_enabled,
+        bucket.logging_target_bucket = bucket_logging.target_bucket,
+        bucket.lastupdated = $update_tag
+    """
+    neo4j_session.run(
+        ingest_bucket_logging,
+        bucket_logging_configs=bucket_logging_configs,
+        update_tag=update_tag,
+    )
 def _set_default_values(neo4j_session: neo4j.Session, aws_account_id: str) -> None:
     set_defaults = """
     MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(s:S3Bucket) where s.anonymous_actions IS NULL
@@ -516,6 +568,7 @@ def load_s3_details(
     versioning_configs: List[Dict] = []
     public_access_block_configs: List[Dict] = []
     bucket_ownership_controls_configs: List[Dict] = []
+    bucket_logging_configs: List[Dict] = []
     for (
         bucket,
         acl,
@@ -524,6 +577,7 @@ def load_s3_details(
         versioning,
         public_access_block,
         bucket_ownership_controls,
+        bucket_logging,
     ) in s3_details_iter:
         parsed_acls = parse_acl(acl, bucket, aws_account_id)
         if parsed_acls is not None:
@@ -551,6 +605,9 @@ def load_s3_details(
         )
         if parsed_bucket_ownership_controls is not None:
             bucket_ownership_controls_configs.append(parsed_bucket_ownership_controls)
+        parsed_bucket_logging = parse_bucket_logging(bucket, bucket_logging)
+        if parsed_bucket_logging is not None:
+            bucket_logging_configs.append(parsed_bucket_logging)
     # cleanup existing policy properties set on S3 Buckets
     run_cleanup_job(
@@ -569,6 +626,7 @@ def load_s3_details(
     _load_bucket_ownership_controls(
         neo4j_session, bucket_ownership_controls_configs, update_tag
     )
+    _load_bucket_logging(neo4j_session, bucket_logging_configs, update_tag)
     _set_default_values(neo4j_session, aws_account_id)
@@ -851,6 +909,52 @@ def parse_bucket_ownership_controls(
     }
+def parse_bucket_logging(bucket: str, bucket_logging: Optional[Dict]) -> Optional[Dict]:
+    """Parses the S3 bucket logging status configuration and returns a dict of the relevant data"""
+    # Logging status object JSON looks like:
+    # {
+    #     'LoggingEnabled': {
+    #         'TargetBucket': 'string',
+    #         'TargetGrants': [
+    #             {
+    #                 'Grantee': {
+    #                     'DisplayName': 'string',
+    #                     'EmailAddress': 'string',
+    #                     'ID': 'string',
+    #                     'Type': 'CanonicalUser'|'AmazonCustomerByEmail'|'Group',
+    #                     'URI': 'string'
+    #                 },
+    #                 'Permission': 'FULL_CONTROL'|'READ'|'WRITE'
+    #             },
+    #         ],
+    #         'TargetPrefix': 'string',
+    #         'TargetObjectKeyFormat': {
+    #             'SimplePrefix': {},
+    #             'PartitionedPrefix': {
+    #                 'PartitionDateSource': 'EventTime'|'DeliveryTime'
+    #             }
+    #         }
+    #     }
+    # }
+    # Or empty dict {} if logging is not enabled
+    if bucket_logging is None:
+        return None
+    logging_config = bucket_logging.get("LoggingEnabled", {})
+    if not logging_config:
+        return {
+            "bucket": bucket,
+            "logging_enabled": False,
+            "target_bucket": None,
+        }
+    return {
+        "bucket": bucket,
+        "logging_enabled": True,
+        "target_bucket": logging_config.get("TargetBucket"),
+    }
 @timeit
 def parse_notification_configuration(
     bucket: str, notification_config: Optional[Dict]

cartography/intel/entra/__init__.py CHANGED Viewed

@@ -1,14 +1,13 @@
 import asyncio
-import datetime
 import logging
-from traceback import TracebackException
-from typing import Awaitable
-from typing import Callable
 import neo4j
 from cartography.config import Config
-from cartography.intel.entra.resources import RESOURCE_FUNCTIONS
+from cartography.intel.entra.applications import sync_entra_applications
+from cartography.intel.entra.groups import sync_entra_groups
+from cartography.intel.entra.ou import sync_entra_ous
+from cartography.intel.entra.users import sync_entra_users
 from cartography.util import timeit
 logger = logging.getLogger(__name__)
@@ -40,46 +39,45 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
     }
     async def main() -> None:
-        failed_stages = []
-        exception_tracebacks = []
+        # Run user sync
+        await sync_entra_users(
+            neo4j_session,
+            config.entra_tenant_id,
+            config.entra_client_id,
+            config.entra_client_secret,
+            config.update_tag,
+            common_job_parameters,
+        )
-        async def run_stage(name: str, func: Callable[..., Awaitable[None]]) -> None:
-            try:
-                await func(
-                    neo4j_session,
-                    config.entra_tenant_id,
-                    config.entra_client_id,
-                    config.entra_client_secret,
-                    config.update_tag,
-                    common_job_parameters,
-                )
-            except Exception as e:
-                if config.entra_best_effort_mode:
-                    timestamp = datetime.datetime.now()
-                    failed_stages.append(name)
-                    exception_traceback = TracebackException.from_exception(e)
-                    traceback_string = "".join(exception_traceback.format())
-                    exception_tracebacks.append(
-                        f"{timestamp} - Exception for stage {name}\n{traceback_string}"
-                    )
-                    logger.warning(
-                        f"Caught exception syncing {name}. entra-best-effort-mode is on so we are continuing "
-                        "on to the next Entra sync. All exceptions will be aggregated and re-logged at the end of the sync.",
-                        exc_info=True,
-                    )
-                else:
-                    logger.error("Error during Entra sync", exc_info=True)
-                    raise
+        # Run group sync
+        await sync_entra_groups(
+            neo4j_session,
+            config.entra_tenant_id,
+            config.entra_client_id,
+            config.entra_client_secret,
+            config.update_tag,
+            common_job_parameters,
+        )
-        for name, func in RESOURCE_FUNCTIONS:
-            await run_stage(name, func)
+        # Run OU sync
+        await sync_entra_ous(
+            neo4j_session,
+            config.entra_tenant_id,
+            config.entra_client_id,
+            config.entra_client_secret,
+            config.update_tag,
+            common_job_parameters,
+        )
-        if failed_stages:
-            logger.error(
-                f"Entra sync failed for the following stages: {', '.join(failed_stages)}. "
-                "See the logs for more details.",
-            )
-            raise Exception("\n".join(exception_tracebacks))
+        # Run application sync
+        await sync_entra_applications(
+            neo4j_session,
+            config.entra_tenant_id,
+            config.entra_client_id,
+            config.entra_client_secret,
+            config.update_tag,
+            common_job_parameters,
+        )
-    # Execute all syncs in sequence
+    # Execute both syncs in sequence
     asyncio.run(main())

cartography/intel/entra/applications.py CHANGED Viewed

@@ -172,11 +172,12 @@ async def get_app_role_assignments(
             )
             continue
         except Exception as e:
+            # Only catch truly unexpected errors - these should be rare
             logger.error(
                 f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
                 exc_info=True,
             )
-            raise
+            continue
     logger.info(f"Retrieved {len(assignments)} app role assignments total")
     return assignments

cartography/intel/entra/ou.py CHANGED Viewed

@@ -43,7 +43,7 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
                 current_request = None
         except Exception as e:
             logger.error(f"Failed to retrieve administrative units: {str(e)}")
-            raise
+            current_request = None
     return all_units

cartography/intel/github/__init__.py CHANGED Viewed

@@ -3,7 +3,6 @@ import json
 import logging
 import neo4j
-from requests import exceptions
 import cartography.intel.github.repos
 import cartography.intel.github.teams
@@ -34,27 +33,24 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
     }
     # run sync for the provided github tokens
     for auth_data in auth_tokens["organization"]:
-        try:
-            cartography.intel.github.users.sync(
-                neo4j_session,
-                common_job_parameters,
-                auth_data["token"],
-                auth_data["url"],
-                auth_data["name"],
-            )
-            cartography.intel.github.repos.sync(
-                neo4j_session,
-                common_job_parameters,
-                auth_data["token"],
-                auth_data["url"],
-                auth_data["name"],
-            )
-            cartography.intel.github.teams.sync_github_teams(
-                neo4j_session,
-                common_job_parameters,
-                auth_data["token"],
-                auth_data["url"],
-                auth_data["name"],
-            )
-        except exceptions.RequestException as e:
-            logger.error("Could not complete request to the GitHub API: %s", e)
+        cartography.intel.github.users.sync(
+            neo4j_session,
+            common_job_parameters,
+            auth_data["token"],
+            auth_data["url"],
+            auth_data["name"],
+        )
+        cartography.intel.github.repos.sync(
+            neo4j_session,
+            common_job_parameters,
+            auth_data["token"],
+            auth_data["url"],
+            auth_data["name"],
+        )
+        cartography.intel.github.teams.sync_github_teams(
+            neo4j_session,
+            common_job_parameters,
+            auth_data["token"],
+            auth_data["url"],
+            auth_data["name"],
+        )

cartography/intel/github/repos.py CHANGED Viewed

@@ -647,9 +647,6 @@ def _transform_dependency_graph(
             requirements = dep.get("requirements", "")
             package_manager = dep.get("packageManager", "").upper()
-            # Extract version from requirements string if available
-            pinned_version = _extract_version_from_requirements(requirements)
             # Create ecosystem-specific canonical name
             canonical_name = _canonicalize_dependency_name(
                 package_name, package_manager
@@ -658,11 +655,12 @@ def _transform_dependency_graph(
             # Create ecosystem identifier
             ecosystem = package_manager.lower() if package_manager else "unknown"
-            # Create simple dependency ID using canonical name and version
+            # Create simple dependency ID using canonical name and requirements
             # This allows the same dependency to be shared across multiple repos
+            requirements_for_id = (requirements or "").strip()
             dependency_id = (
-                f"{canonical_name}|{pinned_version}"
-                if pinned_version
+                f"{canonical_name}|{requirements_for_id}"
+                if requirements_for_id
                 else canonical_name
             )
@@ -677,15 +675,12 @@ def _transform_dependency_graph(
                     "id": dependency_id,
                     "name": canonical_name,
                     "original_name": package_name,  # Keep original for reference
-                    "version": pinned_version,
                     "requirements": normalized_requirements,
                     "ecosystem": ecosystem,
                     "package_manager": package_manager,
                     "manifest_path": manifest_path,
                     "manifest_id": manifest_id,
                     "repo_url": repo_url,
-                    # Add separate fields for easier querying
-                    "repo_name": repo_url.split("/")[-1] if repo_url else "",
                     "manifest_file": (
                         manifest_path.split("/")[-1] if manifest_path else ""
                     ),
@@ -698,33 +693,6 @@ def _transform_dependency_graph(
         logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
-def _extract_version_from_requirements(requirements: Optional[str]) -> Optional[str]:
-    """
-    Extract a pinned version from a requirements string if it exists.
-    Examples: "1.2.3" -> "1.2.3", "^1.2.3" -> None, ">=1.0,<2.0" -> None
-    """
-    if not requirements or not requirements.strip():
-        return None
-    # Handle exact version specifications (no operators)
-    if requirements and not any(
-        op in requirements for op in ["^", "~", ">", "<", "=", "*"]
-    ):
-        stripped = requirements.strip()
-        return stripped if stripped else None
-    # Handle == specifications
-    if "==" in requirements:
-        parts = requirements.split("==")
-        if len(parts) == 2:
-            version = parts[1].strip()
-            # Remove any trailing constraints
-            version = version.split(",")[0].split(" ")[0]
-            return version if version else None
-    return None
 def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
     """
     Canonicalize dependency names based on ecosystem conventions.

cartography/intel/kubernetes/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from cartography.config import Config
 from cartography.intel.kubernetes.clusters import sync_kubernetes_cluster
 from cartography.intel.kubernetes.namespaces import sync_namespaces
 from cartography.intel.kubernetes.pods import sync_pods
+from cartography.intel.kubernetes.rbac import sync_kubernetes_rbac
 from cartography.intel.kubernetes.secrets import sync_secrets
 from cartography.intel.kubernetes.services import sync_services
 from cartography.intel.kubernetes.util import get_k8s_clients
@@ -38,6 +39,9 @@ def start_k8s_ingestion(session: Session, config: Config) -> None:
             common_job_parameters["CLUSTER_ID"] = cluster_info.get("id")
             sync_namespaces(session, client, config.update_tag, common_job_parameters)
+            sync_kubernetes_rbac(
+                session, client, config.update_tag, common_job_parameters
+            )
             all_pods = sync_pods(
                 session,
                 client,

cartography 0.110.0rc1__py3-none-any.whl → 0.110.0rc2__py3-none-any.whl

Potentially problematic release.

cartography 0.110.0rc1py3-none-any.whl → 0.110.0rc2py3-none-any.whl