PyPI - cartography - Versions diffs - 0.107.0rc3__py3-none-any.whl → 0.108.0__py3-none-any.whl - Mend

cartography 0.107.0rc3py3-none-any.whl → 0.108.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cartography might be problematic. Click here for more details.

Files changed (47) hide show

cartography/_version.py +2 -2
cartography/cli.py +10 -0
cartography/config.py +5 -0
cartography/data/indexes.cypher +0 -10
cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
cartography/intel/aws/__init__.py +1 -0
cartography/intel/aws/cloudtrail.py +17 -4
cartography/intel/aws/cloudtrail_management_events.py +560 -16
cartography/intel/aws/cloudwatch.py +73 -4
cartography/intel/aws/ec2/security_groups.py +140 -122
cartography/intel/aws/ec2/snapshots.py +47 -84
cartography/intel/aws/ec2/subnets.py +37 -63
cartography/intel/aws/ecr.py +55 -80
cartography/intel/aws/elasticache.py +102 -79
cartography/intel/aws/guardduty.py +275 -0
cartography/intel/aws/resources.py +2 -0
cartography/intel/aws/secretsmanager.py +62 -44
cartography/intel/github/repos.py +370 -28
cartography/models/aws/cloudtrail/management_events.py +95 -6
cartography/models/aws/cloudtrail/trail.py +21 -0
cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
cartography/models/aws/ec2/security_group_rules.py +109 -0
cartography/models/aws/ec2/security_groups.py +90 -0
cartography/models/aws/ec2/snapshots.py +58 -0
cartography/models/aws/ec2/subnets.py +65 -0
cartography/models/aws/ec2/volumes.py +20 -0
cartography/models/aws/ecr/__init__.py +0 -0
cartography/models/aws/ecr/image.py +41 -0
cartography/models/aws/ecr/repository.py +72 -0
cartography/models/aws/ecr/repository_image.py +95 -0
cartography/models/aws/elasticache/__init__.py +0 -0
cartography/models/aws/elasticache/cluster.py +65 -0
cartography/models/aws/elasticache/topic.py +67 -0
cartography/models/aws/guardduty/__init__.py +1 -0
cartography/models/aws/guardduty/findings.py +102 -0
cartography/models/aws/secretsmanager/secret.py +106 -0
cartography/models/github/dependencies.py +74 -0
cartography/models/github/manifests.py +49 -0
{cartography-0.107.0rc3.dist-info → cartography-0.108.0.dist-info}/METADATA +3 -3
{cartography-0.107.0rc3.dist-info → cartography-0.108.0.dist-info}/RECORD +44 -29
cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
{cartography-0.107.0rc3.dist-info → cartography-0.108.0.dist-info}/WHEEL +0 -0
{cartography-0.107.0rc3.dist-info → cartography-0.108.0.dist-info}/entry_points.txt +0 -0
{cartography-0.107.0rc3.dist-info → cartography-0.108.0.dist-info}/licenses/LICENSE +0 -0
{cartography-0.107.0rc3.dist-info → cartography-0.108.0.dist-info}/top_level.txt +0 -0

cartography/intel/aws/guardduty.py ADDED Viewed

@@ -0,0 +1,275 @@
+import logging
+from typing import Any
+from typing import Dict
+from typing import List
+import boto3
+import boto3.session
+import neo4j
+from cartography.client.core.tx import load
+from cartography.graph.job import GraphJob
+from cartography.models.aws.guardduty.findings import GuardDutyFindingSchema
+from cartography.stats import get_stats_client
+from cartography.util import aws_handle_regions
+from cartography.util import aws_paginate
+from cartography.util import merge_module_sync_metadata
+from cartography.util import timeit
+logger = logging.getLogger(__name__)
+stat_handler = get_stats_client(__name__)
+def _get_severity_range_for_threshold(
+    severity_threshold: str | None,
+) -> List[str] | None:
+    """
+    Convert severity threshold string to GuardDuty numeric severity range.
+    GuardDuty severity mappings:
+    - LOW: 1.0-3.9
+    - MEDIUM: 4.0-6.9
+    - HIGH: 7.0-8.9
+    - CRITICAL: 9.0-10.0
+    :param severity_threshold: Severity threshold (LOW, MEDIUM, HIGH, CRITICAL)
+    :return: List of numeric severity ranges to include, or None for no filtering
+    """
+    if not severity_threshold:
+        return None
+    threshold_upper = severity_threshold.upper().strip()
+    # Map threshold to numeric ranges - include threshold level and above
+    if threshold_upper == "LOW":
+        return ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]  # All severities
+    elif threshold_upper == "MEDIUM":
+        return ["4", "5", "6", "7", "8", "9", "10"]  # MEDIUM and above
+    elif threshold_upper == "HIGH":
+        return ["7", "8", "9", "10"]  # HIGH and CRITICAL only
+    elif threshold_upper == "CRITICAL":
+        return ["9", "10"]  # CRITICAL only
+    else:
+        return None
+@aws_handle_regions
+def get_detectors(
+    boto3_session: boto3.session.Session,
+    region: str,
+) -> List[str]:
+    """
+    Get GuardDuty detector IDs for all detectors in a region.
+    """
+    client = boto3_session.client("guardduty", region_name=region)
+    # Get all detector IDs in this region
+    detectors_response = client.list_detectors()
+    detector_ids = detectors_response.get("DetectorIds", [])
+    if not detector_ids:
+        logger.info(f"No GuardDuty detectors found in region {region}")
+        return []
+    logger.info(f"Found {len(detector_ids)} GuardDuty detectors in region {region}")
+    return detector_ids
+@aws_handle_regions
+@timeit
+def get_findings(
+    boto3_session: boto3.session.Session,
+    region: str,
+    detector_id: str,
+    severity_threshold: str | None = None,
+) -> List[Dict[str, Any]]:
+    """
+    Get GuardDuty findings for a specific detector.
+    Only fetches unarchived findings to avoid including closed/resolved findings.
+    Optionally filters by severity threshold.
+    """
+    client = boto3_session.client("guardduty", region_name=region)
+    # Build FindingCriteria - always exclude archived findings
+    criteria = {"service.archived": {"Equals": ["false"]}}
+    # Add severity filtering if threshold is provided
+    severity_range = _get_severity_range_for_threshold(severity_threshold)
+    if severity_range:
+        min_severity = min(
+            float(s) for s in severity_range
+        )  # get min severity from range
+        # I chose to ignore the type error here  because the AWS API has fields that require different types
+        criteria["severity"] = {"GreaterThanOrEqual": int(min_severity)}  # type: ignore
+    # Get all finding IDs for this detector with filtering
+    finding_ids = list(
+        aws_paginate(
+            client,
+            "list_findings",
+            "FindingIds",
+            DetectorId=detector_id,
+            FindingCriteria={"Criterion": criteria},
+        )
+    )
+    if not finding_ids:
+        logger.info(f"No findings found for detector {detector_id} in region {region}")
+        return []
+    findings_data = []
+    # Process findings in batches (GuardDuty API limit is 50)
+    batch_size = 50
+    for i in range(0, len(finding_ids), batch_size):
+        batch_ids = finding_ids[i : i + batch_size]
+        findings_response = client.get_findings(
+            DetectorId=detector_id, FindingIds=batch_ids
+        )
+        findings_batch = findings_response.get("Findings", [])
+        findings_data.extend(findings_batch)
+    logger.info(
+        f"Retrieved {len(findings_data)} findings for detector {detector_id} in region {region}"
+    )
+    return findings_data
+def transform_findings(findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Transform GuardDuty findings from API response to schema format."""
+    transformed: List[Dict[str, Any]] = []
+    for f in findings:
+        item: Dict[str, Any] = {
+            "id": f["Id"],
+            "arn": f.get("Arn"),
+            "type": f.get("Type"),
+            "severity": f.get("Severity"),
+            "title": f.get("Title"),
+            "description": f.get("Description"),
+            "confidence": f.get("Confidence"),
+            "eventfirstseen": f.get("EventFirstSeen"),
+            "eventlastseen": f.get("EventLastSeen"),
+            "accountid": f.get("AccountId"),
+            "region": f.get("Region"),
+            "detectorid": f.get("DetectorId"),
+            "archived": f.get("Archived"),
+        }
+        # Handle nested resource information
+        resource = f.get("Resource", {})
+        item["resource_type"] = resource.get("ResourceType")
+        # Extract resource ID based on resource type
+        if item["resource_type"] == "Instance":
+            details = resource.get("InstanceDetails", {})
+            item["resource_id"] = details.get("InstanceId")
+        elif item["resource_type"] == "S3Bucket":
+            buckets = resource.get("S3BucketDetails") or []
+            if buckets:
+                item["resource_id"] = buckets[0].get("Name")
+        else:
+            item["resource_id"] = None
+        transformed.append(item)
+    return transformed
+@timeit
+def load_guardduty_findings(
+    neo4j_session: neo4j.Session,
+    data: List[Dict[str, Any]],
+    region: str,
+    aws_account_id: str,
+    update_tag: int,
+) -> None:
+    """
+    Load GuardDuty findings information into the graph.
+    """
+    logger.info(
+        f"Loading {len(data)} GuardDuty findings for region {region} into graph."
+    )
+    load(
+        neo4j_session,
+        GuardDutyFindingSchema(),
+        data,
+        lastupdated=update_tag,
+        Region=region,
+        AWS_ID=aws_account_id,
+    )
+@timeit
+def cleanup_guardduty(
+    neo4j_session: neo4j.Session, common_job_parameters: Dict
+) -> None:
+    """
+    Run GuardDuty cleanup job.
+    """
+    logger.debug("Running GuardDuty cleanup job.")
+    cleanup_job = GraphJob.from_node_schema(
+        GuardDutyFindingSchema(), common_job_parameters
+    )
+    cleanup_job.run(neo4j_session)
+@timeit
+def sync(
+    neo4j_session: neo4j.Session,
+    boto3_session: boto3.session.Session,
+    regions: List[str],
+    current_aws_account_id: str,
+    update_tag: int,
+    common_job_parameters: Dict,
+) -> None:
+    """
+    Sync GuardDuty findings for all regions.
+    Severity threshold filter is obtained from common_job_parameters.
+    """
+    # Get severity threshold from common job parameters
+    severity_threshold = common_job_parameters.get("aws_guardduty_severity_threshold")
+    for region in regions:
+        logger.info(
+            f"Syncing GuardDuty findings for {region} in account {current_aws_account_id}"
+        )
+        # Get all detectors in the region
+        detector_ids = get_detectors(boto3_session, region)
+        if not detector_ids:
+            logger.info(f"No GuardDuty detectors found in region {region}, skipping.")
+            continue
+        all_findings = []
+        # Get findings for each detector
+        for detector_id in detector_ids:
+            findings = get_findings(
+                boto3_session, region, detector_id, severity_threshold
+            )
+            all_findings.extend(findings)
+        transformed_findings = transform_findings(all_findings)
+        load_guardduty_findings(
+            neo4j_session,
+            transformed_findings,
+            region,
+            current_aws_account_id,
+            update_tag,
+        )
+    # Cleanup and metadata update (outside region loop)
+    cleanup_guardduty(neo4j_session, common_job_parameters)
+    merge_module_sync_metadata(
+        neo4j_session,
+        group_type="AWSAccount",
+        group_id=current_aws_account_id,
+        synced_type="GuardDutyFinding",
+        update_tag=update_tag,
+        stat_handler=stat_handler,
+    )

cartography/intel/aws/resources.py CHANGED Viewed

@@ -18,6 +18,7 @@ from . import eks
 from . import elasticache
 from . import elasticsearch
 from . import emr
+from . import guardduty
 from . import iam
 from . import identitycenter
 from . import inspector
@@ -111,5 +112,6 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
     "cloudtrail_management_events": cloudtrail_management_events.sync,
     "cloudwatch": cloudwatch.sync,
     "efs": efs.sync,
+    "guardduty": guardduty.sync,
     "codebuild": codebuild.sync,
 }

cartography/intel/aws/secretsmanager.py CHANGED Viewed

@@ -7,6 +7,7 @@ import neo4j
 from cartography.client.core.tx import load
 from cartography.graph.job import GraphJob
+from cartography.models.aws.secretsmanager.secret import SecretsManagerSecretSchema
 from cartography.models.aws.secretsmanager.secret_version import (
     SecretsManagerSecretVersionSchema,
 )
@@ -14,7 +15,6 @@ from cartography.stats import get_stats_client
 from cartography.util import aws_handle_regions
 from cartography.util import dict_date_to_epoch
 from cartography.util import merge_module_sync_metadata
-from cartography.util import run_cleanup_job
 from cartography.util import timeit
 logger = logging.getLogger(__name__)
@@ -32,6 +32,37 @@ def get_secret_list(boto3_session: boto3.session.Session, region: str) -> List[D
     return secrets
+def transform_secrets(
+    secrets: List[Dict],
+) -> List[Dict]:
+    """
+    Transform AWS Secrets Manager Secrets to match the data model.
+    """
+    transformed_data = []
+    for secret in secrets:
+        # Start with a copy of the original secret data
+        transformed = dict(secret)
+        # Convert date fields to epoch timestamps
+        transformed["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
+        transformed["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
+        transformed["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
+        transformed["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
+        transformed["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
+        # Flatten nested RotationRules.AutomaticallyAfterDays property
+        if "RotationRules" in secret and secret["RotationRules"]:
+            rotation_rules = secret["RotationRules"]
+            if "AutomaticallyAfterDays" in rotation_rules:
+                transformed["RotationRulesAutomaticallyAfterDays"] = rotation_rules[
+                    "AutomaticallyAfterDays"
+                ]
+        transformed_data.append(transformed)
+    return transformed_data
 @timeit
 def load_secrets(
     neo4j_session: neo4j.Session,
@@ -40,48 +71,33 @@ def load_secrets(
     current_aws_account_id: str,
     aws_update_tag: int,
 ) -> None:
-    ingest_secrets = """
-    UNWIND $Secrets as secret
-        MERGE (s:SecretsManagerSecret{id: secret.ARN})
-        ON CREATE SET s.firstseen = timestamp()
-        SET s.name = secret.Name, s.arn = secret.ARN, s.description = secret.Description,
-            s.kms_key_id = secret.KmsKeyId, s.rotation_enabled = secret.RotationEnabled,
-            s.rotation_lambda_arn = secret.RotationLambdaARN,
-            s.rotation_rules_automatically_after_days = secret.RotationRules.AutomaticallyAfterDays,
-            s.last_rotated_date = secret.LastRotatedDate, s.last_changed_date = secret.LastChangedDate,
-            s.last_accessed_date = secret.LastAccessedDate, s.deleted_date = secret.DeletedDate,
-            s.owning_service = secret.OwningService, s.created_date = secret.CreatedDate,
-            s.primary_region = secret.PrimaryRegion, s.region = $Region,
-            s.lastupdated = $aws_update_tag
-        WITH s
-        MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
-        MERGE (owner)-[r:RESOURCE]->(s)
-        ON CREATE SET r.firstseen = timestamp()
-        SET r.lastupdated = $aws_update_tag
-    """
-    for secret in data:
-        secret["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
-        secret["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
-        secret["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
-        secret["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
-        secret["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
-    neo4j_session.run(
-        ingest_secrets,
-        Secrets=data,
+    """
+    Load transformed secrets into Neo4j using the data model.
+    Expects data to already be transformed by transform_secrets().
+    """
+    logger.info(f"Loading {len(data)} Secrets for region {region} into graph.")
+    # Load using the schema-based approach
+    load(
+        neo4j_session,
+        SecretsManagerSecretSchema(),
+        data,
+        lastupdated=aws_update_tag,
         Region=region,
-        AWS_ACCOUNT_ID=current_aws_account_id,
-        aws_update_tag=aws_update_tag,
+        AWS_ID=current_aws_account_id,
     )
 @timeit
 def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
-    run_cleanup_job(
-        "aws_import_secrets_cleanup.json",
-        neo4j_session,
-        common_job_parameters,
+    """
+    Run Secrets cleanup job using the data model.
+    """
+    logger.debug("Running Secrets cleanup job.")
+    cleanup_job = GraphJob.from_node_schema(
+        SecretsManagerSecretSchema(), common_job_parameters
     )
+    cleanup_job.run(neo4j_session)
 @timeit
@@ -121,8 +137,6 @@ def get_secret_versions(
 def transform_secret_versions(
     versions: List[Dict],
-    region: str,
-    aws_account_id: str,
 ) -> List[Dict]:
     """
     Transform AWS Secrets Manager Secret Versions to match the data model.
@@ -203,7 +217,15 @@ def sync(
         )
         secrets = get_secret_list(boto3_session, region)
-        load_secrets(neo4j_session, secrets, region, current_aws_account_id, update_tag)
+        transformed_secrets = transform_secrets(secrets)
+        load_secrets(
+            neo4j_session,
+            transformed_secrets,
+            region,
+            current_aws_account_id,
+            update_tag,
+        )
         all_versions = []
         for secret in secrets:
@@ -216,11 +238,7 @@ def sync(
             )
             all_versions.extend(versions)
-        transformed_data = transform_secret_versions(
-            all_versions,
-            region,
-            current_aws_account_id,
-        )
+        transformed_data = transform_secret_versions(all_versions)
         load_secret_versions(
             neo4j_session,

cartography 0.107.0rc3__py3-none-any.whl → 0.108.0__py3-none-any.whl

Potentially problematic release.

cartography 0.107.0rc3py3-none-any.whl → 0.108.0py3-none-any.whl