PyPI - cartography - Versions diffs - 0.113.0__py3-none-any.whl → 0.114.0__py3-none-any.whl - Mend

cartography 0.113.0py3-none-any.whl → 0.114.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cartography might be problematic. Click here for more details.

Files changed (69) hide show

cartography/_version.py +2 -2
cartography/cli.py +8 -0
cartography/config.py +4 -0
cartography/data/indexes.cypher +0 -27
cartography/intel/aws/iam.py +741 -492
cartography/intel/aws/organizations.py +7 -8
cartography/intel/aws/permission_relationships.py +4 -16
cartography/intel/azure/__init__.py +16 -0
cartography/intel/azure/app_service.py +105 -0
cartography/intel/azure/functions.py +124 -0
cartography/intel/entra/__init__.py +31 -0
cartography/intel/entra/app_role_assignments.py +277 -0
cartography/intel/entra/applications.py +4 -238
cartography/intel/entra/federation/__init__.py +0 -0
cartography/intel/entra/federation/aws_identity_center.py +77 -0
cartography/intel/entra/service_principals.py +217 -0
cartography/intel/gcp/__init__.py +136 -440
cartography/intel/gcp/clients.py +65 -0
cartography/intel/gcp/compute.py +18 -44
cartography/intel/gcp/crm/__init__.py +0 -0
cartography/intel/gcp/crm/folders.py +108 -0
cartography/intel/gcp/crm/orgs.py +65 -0
cartography/intel/gcp/crm/projects.py +109 -0
cartography/intel/gcp/gke.py +72 -113
cartography/intel/github/__init__.py +41 -0
cartography/intel/github/commits.py +423 -0
cartography/intel/github/repos.py +73 -39
cartography/models/aws/iam/access_key.py +103 -0
cartography/models/aws/iam/account_role.py +24 -0
cartography/models/aws/iam/federated_principal.py +60 -0
cartography/models/aws/iam/group.py +60 -0
cartography/models/aws/iam/group_membership.py +26 -0
cartography/models/aws/iam/inline_policy.py +78 -0
cartography/models/aws/iam/managed_policy.py +51 -0
cartography/models/aws/iam/policy_statement.py +57 -0
cartography/models/aws/iam/role.py +83 -0
cartography/models/aws/iam/root_principal.py +52 -0
cartography/models/aws/iam/service_principal.py +30 -0
cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
cartography/models/aws/iam/user.py +54 -0
cartography/models/azure/__init__.py +0 -0
cartography/models/azure/app_service.py +59 -0
cartography/models/azure/function_app.py +59 -0
cartography/models/entra/entra_user_to_aws_sso.py +41 -0
cartography/models/entra/service_principal.py +104 -0
cartography/models/gcp/compute/subnet.py +74 -0
cartography/models/gcp/crm/__init__.py +0 -0
cartography/models/gcp/crm/folders.py +98 -0
cartography/models/gcp/crm/organizations.py +21 -0
cartography/models/gcp/crm/projects.py +100 -0
cartography/models/gcp/gke.py +69 -0
cartography/models/github/commits.py +63 -0
{cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
{cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/RECORD +58 -32
cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
cartography/intel/gcp/crm.py +0 -355
{cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
{cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
{cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
{cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0

cartography/intel/aws/organizations.py CHANGED Viewed

@@ -5,6 +5,7 @@ import boto3
 import botocore.exceptions
 import neo4j
+from cartography.intel.aws.iam import sync_root_principal
 from cartography.util import timeit
 logger = logging.getLogger(__name__)
@@ -110,14 +111,6 @@ def load_aws_accounts(
     ON CREATE SET aa.firstseen = timestamp()
     SET aa.lastupdated = $aws_update_tag, aa.name = $ACCOUNT_NAME, aa.inscope=true
     REMOVE aa.foreign
-    WITH aa
-    MERGE (root:AWSPrincipal{arn: $RootArn})
-    ON CREATE SET root.firstseen = timestamp(), root.type = 'AWS'
-    SET root.lastupdated = $aws_update_tag
-    WITH aa, root
-    MERGE (aa)-[r:RESOURCE]->(root)
-    ON CREATE SET r.firstseen = timestamp()
-    SET r.lastupdated = $aws_update_tag;
     """
     for account_name, account_id in aws_accounts.items():
         root_arn = f"arn:aws:iam::{account_id}:root"
@@ -128,6 +121,12 @@ def load_aws_accounts(
             RootArn=root_arn,
             aws_update_tag=aws_update_tag,
         )
+        # Every AWS account has a root principal
+        sync_root_principal(
+            neo4j_session,
+            account_id,
+            aws_update_tag,
+        )
 @timeit

cartography/intel/aws/permission_relationships.py CHANGED Viewed

@@ -12,6 +12,7 @@ import boto3
 import neo4j
 import yaml
+from cartography.client.core.tx import read_list_of_dicts_tx
 from cartography.graph.statement import GraphStatement
 from cartography.util import timeit
@@ -210,18 +211,6 @@ def calculate_permission_relationships(
     return allowed_mappings
-def parse_statement_node(node_group: List[Any]) -> List[Any]:
-    """Parse a dict from group of Neo4J node
-    Arguments:
-        node_group {[Neo4j.Node]} -- the node to parse
-    Returns:
-        [list] -- A list of statements from the node
-    """
-    return [n._properties for n in node_group]
 def compile_regex(item: str) -> Pattern:
     r"""Compile a clause into a regex. Clause checking in AWS is case insensitive
     The following regex symbols will be replaced to make AWS * and ? matching a regex
@@ -280,7 +269,8 @@ def get_principals_for_account(neo4j_session: neo4j.Session, account_id: str) ->
     RETURN
     DISTINCT principal.arn as principal_arn, policy.id as policy_id, collect(statements) as statements
     """
-    results = neo4j_session.run(
+    results = neo4j_session.execute_read(
+        read_list_of_dicts_tx,
         get_policy_query,
         AccountId=account_id,
     )
@@ -291,9 +281,7 @@ def get_principals_for_account(neo4j_session: neo4j.Session, account_id: str) ->
         statements = r["statements"]
         if principal_arn not in principals:
             principals[principal_arn] = {}
-        principals[principal_arn][policy_id] = compile_statement(
-            parse_statement_node(statements),
-        )
+        principals[principal_arn][policy_id] = compile_statement(statements)
     return principals

cartography/intel/azure/__init__.py CHANGED Viewed

@@ -7,8 +7,10 @@ import neo4j
 from cartography.config import Config
 from cartography.util import timeit
+from . import app_service
 from . import compute
 from . import cosmosdb
+from . import functions
 from . import sql
 from . import storage
 from . import subscription
@@ -40,6 +42,20 @@ def _sync_one_subscription(
         update_tag,
         common_job_parameters,
     )
+    app_service.sync(
+        neo4j_session,
+        credentials,
+        subscription_id,
+        update_tag,
+        common_job_parameters,
+    )
+    functions.sync(
+        neo4j_session,
+        credentials,
+        subscription_id,
+        update_tag,
+        common_job_parameters,
+    )
     sql.sync(
         neo4j_session,
         credentials.credential,

cartography/intel/azure/app_service.py ADDED Viewed

@@ -0,0 +1,105 @@
+import logging
+from typing import Any
+from typing import Dict
+from typing import List
+import neo4j
+from azure.core.exceptions import ClientAuthenticationError
+from azure.core.exceptions import HttpResponseError
+from azure.mgmt.web import WebSiteManagementClient
+from cartography.client.core.tx import load
+from cartography.graph.job import GraphJob
+from cartography.models.azure.app_service import AzureAppServiceSchema
+from cartography.util import timeit
+from .util.credentials import Credentials
+logger = logging.getLogger(__name__)
+@timeit
+def get_app_services(credentials: Credentials, subscription_id: str) -> List[Dict]:
+    """
+    Get a list of App Services from the given Azure subscription.
+    """
+    try:
+        client = WebSiteManagementClient(credentials.credential, subscription_id)
+        # NOTE: This is the same API call as Functions. We get all web apps
+        # and then filter them in the transform stage.
+        return [app.as_dict() for app in client.web_apps.list()]
+    except (ClientAuthenticationError, HttpResponseError) as e:
+        logger.warning(
+            f"Failed to get app services for subscription {subscription_id}: {str(e)}"
+        )
+        return []
+@timeit
+def transform_app_services(app_services_response: List[Dict]) -> List[Dict]:
+    """
+    Transform the raw API response to the dictionary structure that the model expects.
+    """
+    transformed_apps: List[Dict[str, Any]] = []
+    for app in app_services_response:
+        if "functionapp" not in app.get("kind", ""):
+            transformed_app = {
+                "id": app.get("id"),
+                "name": app.get("name"),
+                "kind": app.get("kind"),
+                "location": app.get("location"),
+                "state": app.get("state"),
+                "default_host_name": app.get("default_host_name"),
+                "https_only": app.get("https_only"),
+            }
+            transformed_apps.append(transformed_app)
+    return transformed_apps
+@timeit
+def load_app_services(
+    neo4j_session: neo4j.Session,
+    data: List[Dict[str, Any]],
+    subscription_id: str,
+    update_tag: int,
+) -> None:
+    """
+    Load the transformed Azure App Service data to Neo4j.
+    """
+    load(
+        neo4j_session,
+        AzureAppServiceSchema(),
+        data,
+        lastupdated=update_tag,
+        AZURE_SUBSCRIPTION_ID=subscription_id,
+    )
+@timeit
+def cleanup_app_services(
+    neo4j_session: neo4j.Session, common_job_parameters: Dict
+) -> None:
+    """
+    Run the cleanup job for Azure App Services.
+    """
+    GraphJob.from_node_schema(AzureAppServiceSchema(), common_job_parameters).run(
+        neo4j_session
+    )
+@timeit
+def sync(
+    neo4j_session: neo4j.Session,
+    credentials: Credentials,
+    subscription_id: str,
+    update_tag: int,
+    common_job_parameters: Dict,
+) -> None:
+    """
+    The main sync function for Azure App Services.
+    """
+    logger.info(f"Syncing Azure App Services for subscription {subscription_id}.")
+    raw_apps = get_app_services(credentials, subscription_id)
+    transformed_apps = transform_app_services(raw_apps)
+    load_app_services(neo4j_session, transformed_apps, subscription_id, update_tag)
+    cleanup_app_services(neo4j_session, common_job_parameters)

cartography/intel/azure/functions.py ADDED Viewed

@@ -0,0 +1,124 @@
+import logging
+from typing import Any
+from typing import Dict
+from typing import List
+import neo4j
+from azure.core.exceptions import ClientAuthenticationError
+from azure.core.exceptions import HttpResponseError
+from azure.mgmt.web import WebSiteManagementClient
+from cartography.client.core.tx import load
+from cartography.graph.job import GraphJob
+from cartography.models.azure.function_app import AzureFunctionAppSchema
+from cartography.util import timeit
+from .util.credentials import Credentials
+logger = logging.getLogger(__name__)
+@timeit
+def get_function_apps(credentials: Credentials, subscription_id: str) -> List[Dict]:
+    """
+    Get a list of Function Apps from the given Azure subscription.
+    """
+    try:
+        client = WebSiteManagementClient(credentials.credential, subscription_id)
+        # Note: Function Apps are a type of Web App, so we list all web apps
+        # and then filter them in the transform stage.
+        return [app.as_dict() for app in client.web_apps.list()]
+    except ClientAuthenticationError as e:
+        logger.warning(
+            (
+                "Failed to authenticate to get function apps for subscription '%s'. "
+                "Please check your credentials. Error: %s"
+            ),
+            subscription_id,
+            e,
+        )
+        return []
+    except HttpResponseError as e:
+        logger.warning(
+            (
+                "Failed to get function apps for subscription '%s' due to an API error. "
+                "Status code: %s. Message: %s"
+            ),
+            subscription_id,
+            e.status_code,
+            str(e),
+        )
+        return []
+@timeit
+def transform_function_apps(function_apps_response: List[Dict]) -> List[Dict]:
+    """
+    Transform the raw API response to the dictionary structure that the model expects.
+    """
+    transformed_apps: List[Dict[str, Any]] = []
+    for app in function_apps_response:
+        # We only want to ingest resources that are explicitly function apps.
+        if "functionapp" in app.get("kind", ""):
+            transformed_app = {
+                "id": app.get("id"),
+                "name": app.get("name"),
+                "kind": app.get("kind"),
+                "location": app.get("location"),
+                "state": app.get("state"),
+                "default_host_name": app.get("default_host_name"),
+                "https_only": app.get("https_only"),
+            }
+            transformed_apps.append(transformed_app)
+    return transformed_apps
+@timeit
+def load_function_apps(
+    neo4j_session: neo4j.Session,
+    data: List[Dict[str, Any]],
+    subscription_id: str,
+    update_tag: int,
+) -> None:
+    """
+    Load the transformed Azure Function App data to Neo4j.
+    """
+    load(
+        neo4j_session,
+        AzureFunctionAppSchema(),
+        data,
+        lastupdated=update_tag,
+        AZURE_SUBSCRIPTION_ID=subscription_id,
+    )
+@timeit
+def cleanup_function_apps(
+    neo4j_session: neo4j.Session, common_job_parameters: Dict
+) -> None:
+    """
+    Run the cleanup job for Azure Function Apps.
+    """
+    GraphJob.from_node_schema(AzureFunctionAppSchema(), common_job_parameters).run(
+        neo4j_session
+    )
+@timeit
+def sync(
+    neo4j_session: neo4j.Session,
+    credentials: Credentials,
+    subscription_id: str,
+    update_tag: int,
+    common_job_parameters: Dict,
+) -> None:
+    """
+    The main sync function for Azure Function Apps.
+    """
+    logger.info(f"Syncing Azure Function Apps for subscription {subscription_id}.")
+    raw_apps = get_function_apps(credentials, subscription_id)
+    transformed_apps = transform_function_apps(raw_apps)
+    load_function_apps(neo4j_session, transformed_apps, subscription_id, update_tag)
+    cleanup_function_apps(neo4j_session, common_job_parameters)

cartography/intel/entra/__init__.py CHANGED Viewed

@@ -6,9 +6,12 @@ from azure.identity import ClientSecretCredential
 from msgraph import GraphServiceClient
 from cartography.config import Config
+from cartography.intel.entra.app_role_assignments import sync_app_role_assignments
 from cartography.intel.entra.applications import sync_entra_applications
+from cartography.intel.entra.federation.aws_identity_center import sync_entra_federation
 from cartography.intel.entra.groups import sync_entra_groups
 from cartography.intel.entra.ou import sync_entra_ous
+from cartography.intel.entra.service_principals import sync_service_principals
 from cartography.intel.entra.users import get_tenant
 from cartography.intel.entra.users import load_tenant
 from cartography.intel.entra.users import sync_entra_users
@@ -125,5 +128,33 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
             common_job_parameters,
         )
+        # Run service principals sync
+        await sync_service_principals(
+            neo4j_session,
+            config.entra_tenant_id,
+            config.entra_client_id,
+            config.entra_client_secret,
+            config.update_tag,
+            common_job_parameters,
+        )
+        # Run app role assignments sync
+        await sync_app_role_assignments(
+            neo4j_session,
+            config.entra_tenant_id,
+            config.entra_client_id,
+            config.entra_client_secret,
+            config.update_tag,
+            common_job_parameters,
+        )
+        # Run federation sync (after all resources are synced)
+        await sync_entra_federation(
+            neo4j_session,
+            config.update_tag,
+            config.entra_tenant_id,
+            common_job_parameters,
+        )
     # Execute syncs in sequence
     asyncio.run(main())

cartography/intel/entra/app_role_assignments.py ADDED Viewed

@@ -0,0 +1,277 @@
+import gc
+from typing import Any
+from typing import AsyncGenerator
+import neo4j
+from azure.identity import ClientSecretCredential
+from msgraph import GraphServiceClient
+from msgraph.generated.models.app_role_assignment_collection_response import (
+    AppRoleAssignmentCollectionResponse,
+)
+from cartography.client.core.tx import load
+from cartography.client.core.tx import read_list_of_values_tx
+from cartography.client.core.tx import read_single_value_tx
+from cartography.graph.job import GraphJob
+from cartography.intel.entra.applications import APP_ROLE_ASSIGNMENTS_PAGE_SIZE
+from cartography.intel.entra.applications import logger
+from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
+from cartography.util import timeit
+@timeit
+async def get_app_role_assignments_for_app(
+    client: GraphServiceClient, neo4j_session: neo4j.Session, app_id: str
+) -> AsyncGenerator[dict[str, Any], None]:
+    """
+    Gets app role assignments for a single application by querying the graph for service principal ID.
+    :param client: GraphServiceClient
+    :param neo4j_session: Neo4j session for querying service principal
+    :param app_id: Application ID
+    :return: Generator of app role assignment data as dicts
+    """
+    logger.info(f"Fetching role assignments for application: {app_id}")
+    # Query the graph to get the service principal ID for this application
+    query = """
+    MATCH (sp:EntraServicePrincipal {app_id: $app_id})
+    RETURN sp.id as service_principal_id
+    """
+    service_principal_id = neo4j_session.execute_read(
+        read_single_value_tx, query, app_id=app_id
+    )
+    if not service_principal_id:
+        logger.warning(
+            f"No service principal found in graph for application {app_id}. Continuing."
+        )
+        return
+    # Get assignments for this service principal with pagination and limits
+    # Use maximum page size (999) to get more data per request
+    # Memory is managed through streaming and batching, not page size
+    request_config = client.service_principals.by_service_principal_id(
+        service_principal_id
+    ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
+        query_parameters=client.service_principals.by_service_principal_id(
+            service_principal_id
+        ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
+            top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE  # Maximum allowed by Microsoft Graph API
+        )
+    )
+    assignments_page: AppRoleAssignmentCollectionResponse | None = (
+        await client.service_principals.by_service_principal_id(
+            service_principal_id
+        ).app_role_assigned_to.get(request_configuration=request_config)
+    )
+    assignment_count = 0
+    page_count = 0
+    while assignments_page:
+        page_count += 1
+        if assignments_page.value:
+            page_valid_count = 0
+            page_skipped_count = 0
+            # Process assignments and immediately yield to avoid accumulation
+            for assignment in assignments_page.value:
+                # Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
+                if assignment.principal_id:
+                    assignment_count += 1
+                    page_valid_count += 1
+                    yield {
+                        "id": assignment.id,
+                        "app_role_id": assignment.app_role_id,
+                        "created_date_time": assignment.created_date_time,
+                        "principal_id": assignment.principal_id,
+                        "principal_display_name": assignment.principal_display_name,
+                        "principal_type": assignment.principal_type,
+                        "resource_display_name": assignment.resource_display_name,
+                        "resource_id": assignment.resource_id,
+                        "application_app_id": app_id,
+                    }
+                else:
+                    page_skipped_count += 1
+            # Log page results with details about skipped objects
+            if page_skipped_count > 0:
+                logger.warning(
+                    f"Page {page_count} for {app_id}: {page_valid_count} valid assignments, "
+                    f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
+                )
+            else:
+                logger.debug(
+                    f"Page {page_count} for {app_id}: {page_valid_count} assignments. "
+                    f"Total: {assignment_count}"
+                )
+            # Force garbage collection after each page
+            gc.collect()
+        # Check if we have more pages to fetch
+        if not assignments_page.odata_next_link:
+            break
+        # Clear previous page before fetching next
+        assignments_page.value = None
+        # Fetch next page
+        logger.debug(f"Fetching page {page_count + 1} of assignments for {app_id}")
+        next_page_url = assignments_page.odata_next_link
+        assignments_page = await client.service_principals.with_url(next_page_url).get()
+    logger.info(
+        f"Successfully retrieved {assignment_count} assignments for application {app_id} (pages: {page_count})"
+    )
+def transform_app_role_assignments(
+    assignments: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """
+    Transform app role assignment data for graph loading.
+    :param assignments: Raw app role assignment data as dicts
+    :return: Transformed assignment data for graph loading
+    """
+    transformed = []
+    for assign in assignments:
+        transformed.append(
+            {
+                "id": assign["id"],
+                "app_role_id": (
+                    str(assign["app_role_id"]) if assign["app_role_id"] else None
+                ),
+                "created_date_time": assign["created_date_time"],
+                "principal_id": (
+                    str(assign["principal_id"]) if assign["principal_id"] else None
+                ),
+                "principal_display_name": assign["principal_display_name"],
+                "principal_type": assign["principal_type"],
+                "resource_display_name": assign["resource_display_name"],
+                "resource_id": (
+                    str(assign["resource_id"]) if assign["resource_id"] else None
+                ),
+                "application_app_id": assign["application_app_id"],
+            }
+        )
+    return transformed
+@timeit
+def load_app_role_assignments(
+    neo4j_session: neo4j.Session,
+    assignments_data: list[dict[str, Any]],
+    update_tag: int,
+    tenant_id: str,
+) -> None:
+    """
+    Load Entra app role assignments to the graph.
+    :param neo4j_session: Neo4j session
+    :param assignments_data: Assignment data to load
+    :param update_tag: Update tag for tracking data freshness
+    :param tenant_id: Entra tenant ID
+    """
+    load(
+        neo4j_session,
+        EntraAppRoleAssignmentSchema(),
+        assignments_data,
+        lastupdated=update_tag,
+        TENANT_ID=tenant_id,
+    )
+@timeit
+def cleanup_app_role_assignments(
+    neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
+) -> None:
+    """
+    Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
+    :param neo4j_session: Neo4j session
+    :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
+    """
+    GraphJob.from_node_schema(
+        EntraAppRoleAssignmentSchema(), common_job_parameters
+    ).run(neo4j_session)
+@timeit
+async def sync_app_role_assignments(
+    neo4j_session: neo4j.Session,
+    tenant_id: str,
+    client_id: str,
+    client_secret: str,
+    update_tag: int,
+    common_job_parameters: dict[str, Any],
+) -> None:
+    """
+    Sync Entra app role assignments to the graph.
+    :param neo4j_session: Neo4j session
+    :param tenant_id: Entra tenant ID
+    :param client_id: Azure application client ID
+    :param client_secret: Azure application client secret
+    :param update_tag: Update tag for tracking data freshness
+    :param common_job_parameters: Common job parameters for cleanup
+    """
+    # Create credentials and client
+    credential = ClientSecretCredential(
+        tenant_id=tenant_id,
+        client_id=client_id,
+        client_secret=client_secret,
+    )
+    client = GraphServiceClient(
+        credential,
+        scopes=["https://graph.microsoft.com/.default"],
+    )
+    assignment_batch_size = 200  # Batch size for assignments
+    assignments_batch = []
+    total_assignment_count = 0
+    # Get app_ids from graph instead of streaming from API again
+    query = "MATCH (app:EntraApplication) RETURN app.app_id"
+    app_ids = neo4j_session.execute_read(read_list_of_values_tx, query)
+    for app_id in app_ids:
+        # Stream app role assignments (now using graph query for service principal ID)
+        async for assignment in get_app_role_assignments_for_app(
+            client, neo4j_session, app_id
+        ):
+            assignments_batch.append(assignment)
+            total_assignment_count += 1
+            # Transform and load assignments in batches
+            if len(assignments_batch) >= assignment_batch_size:
+                transformed_assignments = transform_app_role_assignments(
+                    assignments_batch
+                )
+                load_app_role_assignments(
+                    neo4j_session, transformed_assignments, update_tag, tenant_id
+                )
+                logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
+                assignments_batch.clear()
+                transformed_assignments.clear()
+                # Force garbage collection after batch load
+                gc.collect()
+    # Process remaining assignments
+    if assignments_batch:
+        transformed_assignments = transform_app_role_assignments(assignments_batch)
+        load_app_role_assignments(
+            neo4j_session, transformed_assignments, update_tag, tenant_id
+        )
+        assignments_batch.clear()
+        transformed_assignments.clear()
+    cleanup_app_role_assignments(neo4j_session, common_job_parameters)
+    logger.info(f"Completed syncing {total_assignment_count} app role assignments")
+    # Final garbage collection
+    gc.collect()

cartography 0.113.0__py3-none-any.whl → 0.114.0__py3-none-any.whl

Potentially problematic release.

cartography 0.113.0py3-none-any.whl → 0.114.0py3-none-any.whl