PyPI - cartography - Versions diffs - 0.104.0rc3__py3-none-any.whl → 0.106.0rc1__py3-none-any.whl - Mend

cartography 0.104.0rc3py3-none-any.whl → 0.106.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cartography might be problematic. Click here for more details.

Files changed (75) hide show

cartography/_version.py +2 -2
cartography/cli.py +26 -1
cartography/client/aws/__init__.py +19 -0
cartography/client/aws/ecr.py +51 -0
cartography/config.py +8 -0
cartography/data/indexes.cypher +0 -37
cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
cartography/graph/cleanupbuilder.py +151 -41
cartography/intel/aws/acm.py +124 -0
cartography/intel/aws/cloudtrail.py +3 -38
cartography/intel/aws/ecr.py +8 -2
cartography/intel/aws/ecs.py +228 -380
cartography/intel/aws/efs.py +99 -11
cartography/intel/aws/iam.py +1 -1
cartography/intel/aws/identitycenter.py +14 -3
cartography/intel/aws/inspector.py +106 -53
cartography/intel/aws/lambda_function.py +1 -1
cartography/intel/aws/rds.py +2 -1
cartography/intel/aws/resources.py +2 -0
cartography/intel/aws/s3.py +195 -4
cartography/intel/aws/sqs.py +36 -90
cartography/intel/entra/__init__.py +22 -0
cartography/intel/entra/applications.py +366 -0
cartography/intel/entra/groups.py +151 -0
cartography/intel/entra/ou.py +21 -5
cartography/intel/kubernetes/__init__.py +30 -14
cartography/intel/kubernetes/clusters.py +86 -0
cartography/intel/kubernetes/namespaces.py +59 -57
cartography/intel/kubernetes/pods.py +140 -77
cartography/intel/kubernetes/secrets.py +95 -45
cartography/intel/kubernetes/services.py +131 -67
cartography/intel/kubernetes/util.py +125 -14
cartography/intel/trivy/__init__.py +161 -0
cartography/intel/trivy/scanner.py +363 -0
cartography/models/aws/acm/__init__.py +0 -0
cartography/models/aws/acm/certificate.py +75 -0
cartography/models/aws/cloudtrail/trail.py +24 -0
cartography/models/aws/ecs/__init__.py +0 -0
cartography/models/aws/ecs/clusters.py +64 -0
cartography/models/aws/ecs/container_definitions.py +93 -0
cartography/models/aws/ecs/container_instances.py +84 -0
cartography/models/aws/ecs/containers.py +80 -0
cartography/models/aws/ecs/services.py +117 -0
cartography/models/aws/ecs/task_definitions.py +97 -0
cartography/models/aws/ecs/tasks.py +110 -0
cartography/models/aws/efs/file_system.py +60 -0
cartography/models/aws/efs/mount_target.py +29 -2
cartography/models/aws/s3/notification.py +24 -0
cartography/models/aws/secretsmanager/secret_version.py +0 -2
cartography/models/aws/sqs/__init__.py +0 -0
cartography/models/aws/sqs/queue.py +89 -0
cartography/models/core/nodes.py +15 -2
cartography/models/entra/app_role_assignment.py +115 -0
cartography/models/entra/application.py +47 -0
cartography/models/entra/group.py +91 -0
cartography/models/kubernetes/__init__.py +0 -0
cartography/models/kubernetes/clusters.py +26 -0
cartography/models/kubernetes/containers.py +108 -0
cartography/models/kubernetes/namespaces.py +51 -0
cartography/models/kubernetes/pods.py +80 -0
cartography/models/kubernetes/secrets.py +79 -0
cartography/models/kubernetes/services.py +108 -0
cartography/models/trivy/__init__.py +0 -0
cartography/models/trivy/findings.py +66 -0
cartography/models/trivy/fix.py +66 -0
cartography/models/trivy/package.py +71 -0
cartography/sync.py +2 -0
cartography/util.py +15 -10
{cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/METADATA +3 -2
{cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/RECORD +74 -40
cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
{cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/WHEEL +0 -0
{cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/entry_points.txt +0 -0
{cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/licenses/LICENSE +0 -0
{cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/top_level.txt +0 -0

cartography/intel/kubernetes/services.py CHANGED Viewed

@@ -1,90 +1,154 @@
+import json
 import logging
-from typing import Dict
-from typing import List
+from typing import Any
-from neo4j import Session
+import neo4j
+from kubernetes.client.models import V1LoadBalancerIngress
+from kubernetes.client.models import V1PortStatus
+from kubernetes.client.models import V1Service
+from cartography.client.core.tx import load
+from cartography.graph.job import GraphJob
 from cartography.intel.kubernetes.util import get_epoch
+from cartography.intel.kubernetes.util import k8s_paginate
 from cartography.intel.kubernetes.util import K8sClient
+from cartography.models.kubernetes.services import KubernetesServiceSchema
 from cartography.util import timeit
 logger = logging.getLogger(__name__)
 @timeit
-def sync_services(
-    session: Session,
-    client: K8sClient,
-    update_tag: int,
-    cluster: Dict,
-    pods: List[Dict],
-) -> None:
-    services = get_services(client, cluster, pods)
-    load_services(session, services, update_tag)
+def get_services(client: K8sClient) -> list[V1Service]:
+    items = k8s_paginate(client.core.list_service_for_all_namespaces)
+    return items
-@timeit
-def get_services(client: K8sClient, cluster: Dict, pods: List[Dict]) -> List[Dict]:
-    services = list()
-    for service in client.core.list_service_for_all_namespaces().items:
+def _format_service_selector(selector: dict[str, str]) -> str:
+    return json.dumps(selector)
+def _format_load_balancer_ingress(ingress: list[V1LoadBalancerIngress] | None) -> str:
+    def _format_ingress_ports(
+        ports: list[V1PortStatus] | None,
+    ) -> list[dict[str, Any]] | None:
+        if ports is None:
+            return None
+        ingress_ports = []
+        for port in ports:
+            ingress_ports.append(
+                {
+                    "error": port.port,
+                    "port": port.protocol,
+                    "protocol": port.ip,
+                }
+            )
+        return ingress_ports
+    if ingress is None:
+        return json.dumps(None)
+    loadbalancer_ingress = []
+    for item in ingress:
+        loadbalancer_ingress.append(
+            {
+                "hostname": item.hostname,
+                "ip": item.ip,
+                "ip_mode": item.ip_mode,
+                "ports": _format_ingress_ports(item.ports),
+            }
+        )
+    return json.dumps(loadbalancer_ingress)
+def transform_services(
+    services: list[V1Service], all_pods: list[dict[str, Any]]
+) -> list[dict[str, Any]]:
+    services_list = []
+    for service in services:
         item = {
             "uid": service.metadata.uid,
             "name": service.metadata.name,
             "creation_timestamp": get_epoch(service.metadata.creation_timestamp),
             "deletion_timestamp": get_epoch(service.metadata.deletion_timestamp),
             "namespace": service.metadata.namespace,
-            "cluster_uid": cluster["uid"],
             "type": service.spec.type,
-            "selector": service.spec.selector,
+            "selector": _format_service_selector(service.spec.selector),
+            "cluster_ip": service.spec.cluster_ip,
             "load_balancer_ip": service.spec.load_balancer_ip,
         }
-        ingresses = service.status.load_balancer.ingress
-        for ingress in ingresses or list():
-            item.update({"ingress_host": ingress.hostname, "ingress_ip": ingress.ip})
-        service_pods = list()
-        for pod in pods:
-            is_service_pod = True if service.spec.selector else False
-            for selector in service.spec.selector or dict():
-                if (
-                    not pod.get("labels")
-                    or selector not in pod["labels"]
-                    or service.spec.selector[selector] != pod["labels"][selector]
-                ):
-                    is_service_pod = False
-                    break
-            if is_service_pod:
-                service_pods.append(pod)
-        item["pods"] = service_pods
-        services.append(item)
-    return services
-def load_services(session: Session, data: List[Dict], update_tag: int) -> None:
-    ingestion_cypher_query = """
-    UNWIND $services as k8service
-        MERGE (service:KubernetesService {id: k8service.uid})
-        ON CREATE SET service.firstseen = timestamp()
-        SET service.lastupdated = $update_tag,
-            service.name = k8service.name,
-            service.created_at = k8service.creation_timestamp,
-            service.deleted_at = k8service.deletion_timestamp,
-            service.type = k8service.type,
-            service.load_balancer_ip = k8service.load_balancer_ip,
-            service.ingress_host = k8service.ingress_host,
-            service.ingress_ip = k8service.ingress_ip
-        WITH service, k8service.namespace as ns, k8service.cluster_uid as cuid, k8service.pods as k8pods
-        MATCH (cluster:KubernetesCluster {id: cuid})-[:HAS_NAMESPACE]->(space:KubernetesNamespace {name: ns})
-        MERGE (space)-[rel1:HAS_SERVICE]->(service)
-        ON CREATE SET rel1.firstseen = timestamp()
-        SET rel1.lastupdated = $update_tag
-        WITH service, k8pods
-        UNWIND k8pods as k8pod
-            MATCH (pod:KubernetesPod {id: k8pod.uid})
-            MERGE (service)-[rel2:SERVES_POD]->(pod)
-            ON CREATE SET rel2.firstseen = timestamp()
-            SET rel2.lastupdated = $update_tag
-    """
-    logger.info(f"Loading {len(data)} kubernetes services.")
-    session.run(ingestion_cypher_query, services=data, update_tag=update_tag)
+        # TODO: instead of storing a json string, we should probably create seperate nodes for each ingress
+        if service.spec.type == "LoadBalancer":
+            if service.status.load_balancer:
+                item["load_balancer_ingress"] = _format_load_balancer_ingress(
+                    service.status.load_balancer.ingress
+                )
+        # check if pod labels match service selector and add pod_ids to item
+        pod_ids = []
+        for pod in all_pods:
+            if pod["namespace"] == service.metadata.namespace:
+                service_selector: dict[str, str] | None = service.spec.selector
+                pod_labels: dict[str, str] | None = json.loads(pod["labels"])
+                # check if pod labels match service selector
+                if pod_labels and service_selector:
+                    if all(
+                        service_selector[key] == pod_labels.get(key)
+                        for key in service_selector
+                    ):
+                        pod_ids.append(pod["uid"])
+        item["pod_ids"] = pod_ids
+        services_list.append(item)
+    return services_list
+def load_services(
+    session: neo4j.Session,
+    services: list[dict[str, Any]],
+    update_tag: int,
+    cluster_id: str,
+    cluster_name: str,
+) -> None:
+    logger.info(f"Loading {len(services)} KubernetesServices")
+    load(
+        session,
+        KubernetesServiceSchema(),
+        services,
+        lastupdated=update_tag,
+        CLUSTER_ID=cluster_id,
+        CLUSTER_NAME=cluster_name,
+    )
+def cleanup(session: neo4j.Session, common_job_parameters: dict[str, Any]) -> None:
+    logger.debug("Running cleanup job for KubernetesService")
+    cleanup_job = GraphJob.from_node_schema(
+        KubernetesServiceSchema(), common_job_parameters
+    )
+    cleanup_job.run(session)
+@timeit
+def sync_services(
+    session: neo4j.Session,
+    client: K8sClient,
+    all_pods: list[dict[str, Any]],
+    update_tag: int,
+    common_job_parameters: dict[str, Any],
+) -> None:
+    services = get_services(client)
+    transformed_services = transform_services(services, all_pods)
+    load_services(
+        session=session,
+        services=transformed_services,
+        update_tag=update_tag,
+        cluster_id=common_job_parameters["CLUSTER_ID"],
+        cluster_name=client.name,
+    )
+    cleanup(session, common_job_parameters)

cartography/intel/kubernetes/util.py CHANGED Viewed

@@ -1,11 +1,16 @@
+import logging
 from datetime import datetime
-from typing import List
-from typing import Union
+from typing import Any
+from typing import Callable
 from kubernetes import config
 from kubernetes.client import ApiClient
 from kubernetes.client import CoreV1Api
 from kubernetes.client import NetworkingV1Api
+from kubernetes.client import VersionApi
+from kubernetes.client.exceptions import ApiException
+logger = logging.getLogger(__name__)
 class KubernetesContextNotFound(Exception):
@@ -13,39 +18,145 @@ class KubernetesContextNotFound(Exception):
 class K8CoreApiClient(CoreV1Api):
-    def __init__(self, name: str, api_client: ApiClient = None) -> None:
+    def __init__(
+        self,
+        name: str,
+        config_file: str,
+        api_client: ApiClient | None = None,
+    ) -> None:
         self.name = name
         if not api_client:
-            api_client = config.new_client_from_config(context=name)
+            api_client = config.new_client_from_config(
+                context=name, config_file=config_file
+            )
         super().__init__(api_client=api_client)
 class K8NetworkingApiClient(NetworkingV1Api):
-    def __init__(self, name: str, api_client: ApiClient = None) -> None:
+    def __init__(
+        self,
+        name: str,
+        config_file: str,
+        api_client: ApiClient | None = None,
+    ) -> None:
+        self.name = name
+        if not api_client:
+            api_client = config.new_client_from_config(
+                context=name, config_file=config_file
+            )
+        super().__init__(api_client=api_client)
+class K8VersionApiClient(VersionApi):
+    def __init__(
+        self,
+        name: str,
+        config_file: str,
+        api_client: ApiClient | None = None,
+    ) -> None:
         self.name = name
         if not api_client:
-            api_client = config.new_client_from_config(context=name)
+            api_client = config.new_client_from_config(
+                context=name, config_file=config_file
+            )
         super().__init__(api_client=api_client)
 class K8sClient:
-    def __init__(self, name: str) -> None:
+    def __init__(
+        self,
+        name: str,
+        config_file: str,
+        external_id: str | None = None,
+    ) -> None:
         self.name = name
-        self.core = K8CoreApiClient(self.name)
-        self.networking = K8NetworkingApiClient(self.name)
+        self.config_file = config_file
+        self.external_id = external_id
+        self.core = K8CoreApiClient(self.name, self.config_file)
+        self.networking = K8NetworkingApiClient(self.name, self.config_file)
+        self.version = K8VersionApiClient(self.name, self.config_file)
-def get_k8s_clients(kubeconfig: str) -> List[K8sClient]:
+def get_k8s_clients(kubeconfig: str) -> list[K8sClient]:
+    # returns a tuple of (all contexts, current context)
     contexts, _ = config.list_kube_config_contexts(kubeconfig)
     if not contexts:
         raise KubernetesContextNotFound("No context found in kubeconfig.")
-    clients = list()
+    clients = []
     for context in contexts:
-        clients.append(K8sClient(context["name"]))
+        clients.append(
+            K8sClient(
+                context["name"],
+                kubeconfig,
+                external_id=context["context"].get("cluster"),
+            ),
+        )
     return clients
-def get_epoch(date: datetime) -> Union[int, None]:
+def get_epoch(date: datetime | None) -> int | None:
     if date:
-        return int(date.strftime("%s"))
+        return int(date.timestamp())
     return None
+def k8s_paginate(
+    list_func: Callable,
+    **kwargs: Any,
+) -> list[dict[str, Any]]:
+    """
+    Handles pagination for a Kubernetes API call.
+    :param list_func: The list function to call (e.g. client.core.list_pod_for_all_namespaces)
+    :param kwargs: Keyword arguments to pass to the list function (e.g. limit=100)
+    :return: A list of all resources returned by the list function
+    """
+    all_resources = []
+    continue_token = None
+    limit = kwargs.pop("limit", 100)
+    function_name = list_func.__name__
+    logger.debug(f"Starting pagination for {function_name} with limit {limit}.")
+    while True:
+        try:
+            if continue_token:
+                response = list_func(limit=limit, _continue=continue_token, **kwargs)
+            else:
+                response = list_func(limit=limit, **kwargs)
+            # Check if items exists on the response
+            if not hasattr(response, "items"):
+                logger.warning(
+                    f"Response from {function_name} does not contain 'items' attribute."
+                )
+                break
+            items_count = len(response.items)
+            all_resources.extend(response.items)
+            logger.debug(f"Retrieved {items_count} {function_name} resources")
+            # Check if metadata exists on the response
+            if not hasattr(response, "metadata"):
+                logger.warning(
+                    f"Response from {function_name} does not contain 'metadata' attribute."
+                )
+                break
+            continue_token = response.metadata._continue
+            if not continue_token:
+                logger.debug(f"No more {function_name} resources to retrieve.")
+                break
+        except ApiException as e:
+            logger.error(
+                f"Kubernetes API error retrieving {function_name} resources. {e}: {e.status} - {e.reason}"
+            )
+            break
+    logger.debug(
+        f"Completed pagination for {function_name}: retrieved {len(all_resources)} resources"
+    )
+    return all_resources

cartography/intel/trivy/__init__.py ADDED Viewed

@@ -0,0 +1,161 @@
+import logging
+from typing import Any
+import boto3
+from neo4j import Session
+from cartography.client.aws import list_accounts
+from cartography.client.aws.ecr import get_ecr_images
+from cartography.config import Config
+from cartography.intel.trivy.scanner import cleanup
+from cartography.intel.trivy.scanner import get_json_files_in_s3
+from cartography.intel.trivy.scanner import sync_single_image_from_s3
+from cartography.stats import get_stats_client
+from cartography.util import timeit
+logger = logging.getLogger(__name__)
+stat_handler = get_stats_client("trivy.scanner")
+@timeit
+def get_scan_targets(
+    neo4j_session: Session,
+    account_ids: list[str] | None = None,
+) -> set[str]:
+    """
+    Return list of ECR images from all accounts in the graph.
+    """
+    if not account_ids:
+        aws_accounts = list_accounts(neo4j_session)
+    else:
+        aws_accounts = account_ids
+    ecr_images: set[str] = set()
+    for account_id in aws_accounts:
+        for _, _, image_uri, _, _ in get_ecr_images(neo4j_session, account_id):
+            ecr_images.add(image_uri)
+    return ecr_images
+def _get_intersection(
+    images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
+) -> list[tuple[str, str]]:
+    """
+    Get the intersection of ECR images in the graph and S3 scan results.
+    Args:
+        images_in_graph: Set of ECR images in the graph
+        json_files: Set of S3 object keys for JSON files
+        trivy_s3_prefix: S3 prefix path containing scan results
+    Returns:
+        List of tuples (image_uri, s3_object_key)
+    """
+    intersection = []
+    prefix_len = len(trivy_s3_prefix)
+    for s3_object_key in json_files:
+        # Sample key "123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
+        # Sample key "folder/derp/123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
+        # Remove the prefix and the .json suffix
+        image_uri = s3_object_key[prefix_len:-5]
+        if image_uri in images_in_graph:
+            intersection.append((image_uri, s3_object_key))
+    return intersection
+@timeit
+def sync_trivy_aws_ecr_from_s3(
+    neo4j_session: Session,
+    trivy_s3_bucket: str,
+    trivy_s3_prefix: str,
+    update_tag: int,
+    common_job_parameters: dict[str, Any],
+    boto3_session: boto3.Session,
+) -> None:
+    """
+    Sync Trivy scan results from S3 for AWS ECR images.
+    Args:
+        neo4j_session: Neo4j session for database operations
+        trivy_s3_bucket: S3 bucket containing scan results
+        trivy_s3_prefix: S3 prefix path containing scan results
+        update_tag: Update tag for tracking
+        common_job_parameters: Common job parameters for cleanup
+        boto3_session: boto3 session for S3 operations
+    """
+    logger.info(
+        f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
+    )
+    images_in_graph: set[str] = get_scan_targets(neo4j_session)
+    json_files: set[str] = get_json_files_in_s3(
+        trivy_s3_bucket, trivy_s3_prefix, boto3_session
+    )
+    intersection: list[tuple[str, str]] = _get_intersection(
+        images_in_graph, json_files, trivy_s3_prefix
+    )
+    if len(intersection) == 0:
+        logger.error(
+            f"Trivy sync was configured, but there are no ECR images with S3 json scan results in bucket "
+            f"'{trivy_s3_bucket}' with prefix '{trivy_s3_prefix}'. "
+            "Skipping Trivy sync to avoid potential data loss. "
+            "Please check the S3 bucket and prefix configuration. We expect the json files in s3 to be named "
+            f"`<image_uri>.json` and to be in the same bucket and prefix as the scan results. If the prefix is "
+            "a folder, it MUST end with a trailing slash '/'. "
+        )
+        logger.error(f"JSON files in S3: {json_files}")
+        raise ValueError("No ECR images with S3 json scan results found.")
+    logger.info(f"Processing {len(intersection)} ECR images with S3 scan results")
+    for image_uri, s3_object_key in intersection:
+        sync_single_image_from_s3(
+            neo4j_session,
+            image_uri,
+            update_tag,
+            trivy_s3_bucket,
+            s3_object_key,
+            boto3_session,
+        )
+    cleanup(neo4j_session, common_job_parameters)
+@timeit
+def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
+    """
+    Start Trivy scan ingestion from S3.
+    Args:
+        neo4j_session: Neo4j session for database operations
+        config: Configuration object containing S3 settings
+    """
+    # Check if S3 configuration is provided
+    if not config.trivy_s3_bucket:
+        logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
+        return
+    # Default to empty string if s3 prefix is not provided
+    if config.trivy_s3_prefix is None:
+        config.trivy_s3_prefix = ""
+    common_job_parameters = {
+        "UPDATE_TAG": config.update_tag,
+    }
+    # Get ECR images to scan
+    boto3_session = boto3.Session()
+    sync_trivy_aws_ecr_from_s3(
+        neo4j_session,
+        config.trivy_s3_bucket,
+        config.trivy_s3_prefix,
+        config.update_tag,
+        common_job_parameters,
+        boto3_session,
+    )
+    # Support other Trivy resource types here e.g. if Google Cloud has images.

cartography 0.104.0rc3__py3-none-any.whl → 0.106.0rc1__py3-none-any.whl

Potentially problematic release.

cartography 0.104.0rc3py3-none-any.whl → 0.106.0rc1py3-none-any.whl