PyPI - cartography - Versions diffs - 0.94.0rc3__py3-none-any.whl → 0.95.0__py3-none-any.whl - Mend

cartography 0.94.0rc3py3-none-any.whl → 0.95.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cartography might be problematic. Click here for more details.

Files changed (32) hide show

cartography/cli.py +42 -24
cartography/config.py +12 -8
cartography/data/indexes.cypher +0 -2
cartography/driftdetect/cli.py +1 -1
cartography/graph/job.py +8 -1
cartography/intel/aws/permission_relationships.py +6 -2
cartography/intel/gcp/__init__.py +110 -23
cartography/intel/kandji/__init__.py +1 -1
cartography/intel/semgrep/__init__.py +9 -2
cartography/intel/semgrep/dependencies.py +201 -0
cartography/intel/semgrep/deployment.py +67 -0
cartography/intel/semgrep/findings.py +22 -53
cartography/intel/snipeit/__init__.py +30 -0
cartography/intel/snipeit/asset.py +74 -0
cartography/intel/snipeit/user.py +75 -0
cartography/intel/snipeit/util.py +35 -0
cartography/models/semgrep/dependencies.py +77 -0
cartography/models/snipeit/__init__.py +0 -0
cartography/models/snipeit/asset.py +81 -0
cartography/models/snipeit/tenant.py +17 -0
cartography/models/snipeit/user.py +49 -0
cartography/sync.py +2 -2
{cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/LICENSE +1 -1
{cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/METADATA +3 -5
{cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/RECORD +28 -21
{cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/WHEEL +1 -1
cartography/data/jobs/cleanup/crxcavator_import_cleanup.json +0 -18
cartography/intel/crxcavator/__init__.py +0 -44
cartography/intel/crxcavator/crxcavator.py +0 -329
cartography-0.94.0rc3.dist-info/NOTICE +0 -4
{cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/entry_points.txt +0 -0
{cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/top_level.txt +0 -0

cartography/cli.py CHANGED Viewed

@@ -220,23 +220,6 @@ class CLI:
                 ' If not specified, cartography by default will run all AWS sync modules available.'
             ),
         )
-        parser.add_argument(
-            '--crxcavator-api-base-uri',
-            type=str,
-            default='https://api.crxcavator.io/v1',
-            help=(
-                'Base URI for the CRXcavator API. Defaults to public API endpoint.'
-            ),
-        )
-        parser.add_argument(
-            '--crxcavator-api-key-env-var',
-            type=str,
-            default=None,
-            help=(
-                'The name of an environment variable containing a key with which to auth to the CRXcavator API. '
-                'Required if you are using the CRXcavator intel module. Ignored otherwise.'
-            ),
-        )
         parser.add_argument(
             '--analysis-job-directory',
             type=str,
@@ -541,6 +524,28 @@ class CLI:
                 'Required if you are using the Semgrep intel module. Ignored otherwise.'
             ),
         )
+        parser.add_argument(
+            '--snipeit-base-uri',
+            type=str,
+            default=None,
+            help=(
+                'Your SnipeIT base URI'
+                'Required if you are using the SnipeIT intel module. Ignored otherwise.'
+            ),
+        )
+        parser.add_argument(
+            '--snipeit-token-env-var',
+            type=str,
+            default=None,
+            help='The name of an environment variable containing token with which to authenticate to SnipeIT.',
+        )
+        parser.add_argument(
+            '--snipeit-tenant-id',
+            type=str,
+            default=None,
+            help='An ID for the SnipeIT tenant.',
+        )
         return parser
     def main(self, argv: str) -> int:
@@ -604,13 +609,6 @@ class CLI:
         else:
             config.okta_api_key = None
-        # CRXcavator config
-        if config.crxcavator_api_base_uri and config.crxcavator_api_key_env_var:
-            logger.debug(f"Reading API key for CRXcavator from env variable {config.crxcavator_api_key_env_var}.")
-            config.crxcavator_api_key = os.environ.get(config.crxcavator_api_key_env_var)
-        else:
-            config.crxcavator_api_key = None
         # GitHub config
         if config.github_config_env_var:
             logger.debug(f"Reading config string for GitHub from environment variable {config.github_config_env_var}")
@@ -744,6 +742,26 @@ class CLI:
         else:
             config.cve_api_key = None
+        # SnipeIT config
+        if config.snipeit_base_uri:
+            if config.snipeit_token_env_var:
+                logger.debug(
+                    "Reading SnipeIT API token from environment variable '%s'.",
+                    config.snipeit_token_env_var,
+                )
+                config.snipeit_token = os.environ.get(config.snipeit_token_env_var)
+            elif os.environ.get('SNIPEIT_TOKEN'):
+                logger.debug(
+                    "Reading SnipeIT API token from environment variable 'SNIPEIT_TOKEN'.",
+                )
+                config.snipeit_token = os.environ.get('SNIPEIT_TOKEN')
+            else:
+                logger.warning("A SnipeIT base URI was provided but a token was not.")
+                config.kandji_token = None
+        else:
+            logger.warning("A SnipeIT base URI was not provided.")
+            config.snipeit_base_uri = None
         # Run cartography
         try:
             return cartography.sync.run_with_config(self.sync, config)

cartography/config.py CHANGED Viewed

@@ -43,10 +43,6 @@ class Config:
     :param azure_client_secret: Client Secret for connecting in a Service Principal Authentication approach. Optional.
     :type aws_requested_syncs: str
     :param aws_requested_syncs: Comma-separated list of AWS resources to sync. Optional.
-    :type crxcavator_api_base_uri: str
-    :param crxcavator_api_base_uri: URI for CRXcavator API. Optional.
-    :type crxcavator_api_key: str
-    :param crxcavator_api_key: Auth key for CRXcavator API. Optional.
     :type analysis_job_directory: str
     :param analysis_job_directory: Path to a directory tree containing analysis jobs to run. Optional.
     :type oci_sync_all_profiles: bool
@@ -111,6 +107,12 @@ class Config:
     :param duo_api_hostname: The Duo api hostname, e.g. "api-abc123.duosecurity.com". Optional.
     :param semgrep_app_token: The Semgrep api token. Optional.
     :type semgrep_app_token: str
+    :type snipeit_base_uri: string
+    :param snipeit_base_uri: SnipeIT data provider base URI. Optional.
+    :type snipeit_token: string
+    :param snipeit_token: Token used to authenticate to the SnipeIT data provider. Optional.
+    :type snipeit_tenant_id: string
+    :param snipeit_tenant_id: Token used to authenticate to the SnipeIT data provider. Optional.
     """
     def __init__(
@@ -131,8 +133,6 @@ class Config:
         azure_client_secret=None,
         aws_requested_syncs=None,
         analysis_job_directory=None,
-        crxcavator_api_base_uri=None,
-        crxcavator_api_key=None,
         oci_sync_all_profiles=None,
         okta_org_id=None,
         okta_api_key=None,
@@ -170,6 +170,9 @@ class Config:
         duo_api_secret=None,
         duo_api_hostname=None,
         semgrep_app_token=None,
+        snipeit_base_uri=None,
+        snipeit_token=None,
+        snipeit_tenant_id=None,
     ):
         self.neo4j_uri = neo4j_uri
         self.neo4j_user = neo4j_user
@@ -187,8 +190,6 @@ class Config:
         self.azure_client_secret = azure_client_secret
         self.aws_requested_syncs = aws_requested_syncs
         self.analysis_job_directory = analysis_job_directory
-        self.crxcavator_api_base_uri = crxcavator_api_base_uri
-        self.crxcavator_api_key = crxcavator_api_key
         self.oci_sync_all_profiles = oci_sync_all_profiles
         self.okta_org_id = okta_org_id
         self.okta_api_key = okta_api_key
@@ -226,3 +227,6 @@ class Config:
         self.duo_api_secret = duo_api_secret
         self.duo_api_hostname = duo_api_hostname
         self.semgrep_app_token = semgrep_app_token
+        self.snipeit_base_uri = snipeit_base_uri
+        self.snipeit_token = snipeit_token
+        self.snipeit_tenant_id = snipeit_tenant_id

cartography/data/indexes.cypher CHANGED Viewed

@@ -65,8 +65,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:AccountAccessKey) ON (n.accesskeyid);
 CREATE INDEX IF NOT EXISTS FOR (n:AccountAccessKey) ON (n.lastupdated);
 CREATE INDEX IF NOT EXISTS FOR (n:AutoScalingGroup) ON (n.arn);
 CREATE INDEX IF NOT EXISTS FOR (n:AutoScalingGroup) ON (n.lastupdated);
-CREATE INDEX IF NOT EXISTS FOR (n:ChromeExtension) ON (n.id);
-CREATE INDEX IF NOT EXISTS FOR (n:ChromeExtension) ON (n.lastupdated);
 CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.id);
 CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.instance_id);
 CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.lastupdated);

cartography/driftdetect/cli.py CHANGED Viewed

@@ -30,7 +30,7 @@ class CLI:
                 'graph database and reports the deviations.'
             ),
             epilog='For more documentation please visit: '
-                   'https://github.com/lyft/cartography/blob/master/docs/drift-detect.md',
+                   'https://cartography-cncf.github.io/cartography/usage/drift-detect.html',
         )
         parser.add_argument(
             '-v',

cartography/graph/job.py CHANGED Viewed

@@ -150,7 +150,14 @@ class GraphJob:
             )
         statements: List[GraphStatement] = [
-            GraphStatement(query, parameters=parameters, iterative=True, iterationsize=100) for query in queries
+            GraphStatement(
+                query,
+                parameters=parameters,
+                iterative=True,
+                iterationsize=100,
+                parent_job_name=node_schema.label,
+                parent_job_sequence_num=idx,
+            ) for idx, query in enumerate(queries, start=1)
         ]
         return cls(

cartography/intel/aws/permission_relationships.py CHANGED Viewed

@@ -322,8 +322,12 @@ def cleanup_rpr(
     )
     statement = GraphStatement(
-        cleanup_rpr_query_template, {'UPDATE_TAG': update_tag, 'AWS_ID': current_aws_id},
-        True, 1000,
+        cleanup_rpr_query_template,
+        {'UPDATE_TAG': update_tag, 'AWS_ID': current_aws_id},
+        True,
+        1000,
+        parent_job_name=f"{relationship_name}:{node_label}",
+        parent_job_sequence_num=1,
     )
     statement.run(neo4j_session)

cartography/intel/gcp/__init__.py CHANGED Viewed

@@ -120,11 +120,11 @@ def _initialize_resources(credentials: GoogleCredentials) -> Resource:
     return Resources(
         crm_v1=_get_crm_resource_v1(credentials),
         crm_v2=_get_crm_resource_v2(credentials),
-        compute=_get_compute_resource(credentials),
-        storage=_get_storage_resource(credentials),
-        container=_get_container_resource(credentials),
         serviceusage=_get_serviceusage_resource(credentials),
-        dns=_get_dns_resource(credentials),
+        compute=None,
+        container=None,
+        dns=None,
+        storage=None,
     )
@@ -159,12 +159,12 @@ def _services_enabled_on_project(serviceusage: Resource, project_id: str) -> Set
         return set()
-def _sync_single_project(
+def _sync_single_project_compute(
     neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
     common_job_parameters: Dict,
 ) -> None:
     """
-    Handles graph sync for a single GCP project.
+    Handles graph sync for a single GCP project on Compute resources.
     :param neo4j_session: The Neo4j session
     :param resources: namedtuple of the GCP resource objects
     :param project_id: The project ID number to sync.  See  the `projectId` field in
@@ -175,14 +175,72 @@ def _sync_single_project(
     """
     # Determine the resources available on the project.
     enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
+    compute_cred = _get_compute_resource(get_gcp_credentials())
     if service_names.compute in enabled_services:
-        compute.sync(neo4j_session, resources.compute, project_id, gcp_update_tag, common_job_parameters)
+        compute.sync(neo4j_session, compute_cred, project_id, gcp_update_tag, common_job_parameters)
+def _sync_single_project_storage(
+    neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
+    common_job_parameters: Dict,
+) -> None:
+    """
+    Handles graph sync for a single GCP project on Storage resources.
+    :param neo4j_session: The Neo4j session
+    :param resources: namedtuple of the GCP resource objects
+    :param project_id: The project ID number to sync.  See  the `projectId` field in
+    https://cloud.google.com/resource-manager/reference/rest/v1/projects
+    :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
+    :param common_job_parameters: Other parameters sent to Neo4j
+    :return: Nothing
+    """
+    # Determine the resources available on the project.
+    enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
+    storage_cred = _get_storage_resource(get_gcp_credentials())
     if service_names.storage in enabled_services:
-        storage.sync_gcp_buckets(neo4j_session, resources.storage, project_id, gcp_update_tag, common_job_parameters)
+        storage.sync_gcp_buckets(neo4j_session, storage_cred, project_id, gcp_update_tag, common_job_parameters)
+def _sync_single_project_gke(
+    neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
+    common_job_parameters: Dict,
+) -> None:
+    """
+    Handles graph sync for a single GCP project GKE resources.
+    :param neo4j_session: The Neo4j session
+    :param resources: namedtuple of the GCP resource objects
+    :param project_id: The project ID number to sync.  See  the `projectId` field in
+    https://cloud.google.com/resource-manager/reference/rest/v1/projects
+    :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
+    :param common_job_parameters: Other parameters sent to Neo4j
+    :return: Nothing
+    """
+    # Determine the resources available on the project.
+    enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
+    container_cred = _get_container_resource(get_gcp_credentials())
     if service_names.gke in enabled_services:
-        gke.sync_gke_clusters(neo4j_session, resources.container, project_id, gcp_update_tag, common_job_parameters)
+        gke.sync_gke_clusters(neo4j_session, container_cred, project_id, gcp_update_tag, common_job_parameters)
+def _sync_single_project_dns(
+    neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
+    common_job_parameters: Dict,
+) -> None:
+    """
+    Handles graph sync for a single GCP project DNS resources.
+    :param neo4j_session: The Neo4j session
+    :param resources: namedtuple of the GCP resource objects
+    :param project_id: The project ID number to sync.  See  the `projectId` field in
+    https://cloud.google.com/resource-manager/reference/rest/v1/projects
+    :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
+    :param common_job_parameters: Other parameters sent to Neo4j
+    :return: Nothing
+    """
+    # Determine the resources available on the project.
+    enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
+    dns_cred = _get_dns_resource(get_gcp_credentials())
     if service_names.dns in enabled_services:
-        dns.sync(neo4j_session, resources.dns, project_id, gcp_update_tag, common_job_parameters)
+        dns.sync(neo4j_session, dns_cred, project_id, gcp_update_tag, common_job_parameters)
 def _sync_multiple_projects(
@@ -203,26 +261,38 @@ def _sync_multiple_projects(
     """
     logger.info("Syncing %d GCP projects.", len(projects))
     crm.sync_gcp_projects(neo4j_session, projects, gcp_update_tag, common_job_parameters)
+    # Compute data sync
+    for project in projects:
+        project_id = project['projectId']
+        logger.info("Syncing GCP project %s for Compute.", project_id)
+        _sync_single_project_compute(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
+    # Storage data sync
     for project in projects:
         project_id = project['projectId']
-        logger.info("Syncing GCP project %s.", project_id)
-        _sync_single_project(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
+        logger.info("Syncing GCP project %s for Storage", project_id)
+        _sync_single_project_storage(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
+    # GKE data sync
+    for project in projects:
+        project_id = project['projectId']
+        logger.info("Syncing GCP project %s for GKE", project_id)
+        _sync_single_project_gke(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
+    # DNS data sync
+    for project in projects:
+        project_id = project['projectId']
+        logger.info("Syncing GCP project %s for DNS", project_id)
+        _sync_single_project_dns(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
 @timeit
-def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
+def get_gcp_credentials() -> GoogleCredentials:
     """
-    Starts the GCP ingestion process by initializing Google Application Default Credentials, creating the necessary
-    resource objects, listing all GCP organizations and projects available to the GCP identity, and supplying that
-    context to all intel modules.
-    :param neo4j_session: The Neo4j session
-    :param config: A `cartography.config` object
-    :return: Nothing
+    Gets access tokens for GCP API access.
+    :param: None
+    :return: GoogleCredentials
     """
-    common_job_parameters = {
-        "UPDATE_TAG": config.update_tag,
-    }
     try:
         # Explicitly use Application Default Credentials.
         # See https://oauth2client.readthedocs.io/en/latest/source/
@@ -239,7 +309,24 @@ def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
             ),
             e,
         )
-        return
+        return credentials
+@timeit
+def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
+    """
+    Starts the GCP ingestion process by initializing Google Application Default Credentials, creating the necessary
+    resource objects, listing all GCP organizations and projects available to the GCP identity, and supplying that
+    context to all intel modules.
+    :param neo4j_session: The Neo4j session
+    :param config: A `cartography.config` object
+    :return: Nothing
+    """
+    common_job_parameters = {
+        "UPDATE_TAG": config.update_tag,
+    }
+    credentials = get_gcp_credentials()
     resources = _initialize_resources(credentials)

cartography/intel/kandji/__init__.py CHANGED Viewed

@@ -21,7 +21,7 @@ def start_kandji_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
     """
     if config.kandji_base_uri is None or config.kandji_token is None or config.kandji_tenant_id is None:
         logger.warning(
-            'Required parameter(s) missing. Skipping sync.',
+            'Required parameter missing. Skipping sync. '
             'See docs to configure.',
         )
         return

cartography/intel/semgrep/__init__.py CHANGED Viewed

@@ -3,7 +3,9 @@ import logging
 import neo4j
 from cartography.config import Config
-from cartography.intel.semgrep.findings import sync
+from cartography.intel.semgrep.dependencies import sync_dependencies
+from cartography.intel.semgrep.deployment import sync_deployment
+from cartography.intel.semgrep.findings import sync_findings
 from cartography.util import timeit
@@ -20,4 +22,9 @@ def start_semgrep_ingestion(
     if not config.semgrep_app_token:
         logger.info('Semgrep import is not configured - skipping this module. See docs to configure.')
         return
-    sync(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
+    # sync_deployment must be called first since it populates common_job_parameters
+    # with the deployment ID and slug, which are required by the other sync functions
+    sync_deployment(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
+    sync_dependencies(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
+    sync_findings(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)

cartography/intel/semgrep/dependencies.py ADDED Viewed

@@ -0,0 +1,201 @@
+import logging
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import List
+import neo4j
+import requests
+from requests.exceptions import HTTPError
+from requests.exceptions import ReadTimeout
+from cartography.client.core.tx import load
+from cartography.graph.job import GraphJob
+from cartography.models.semgrep.dependencies import SemgrepGoLibrarySchema
+from cartography.stats import get_stats_client
+from cartography.util import merge_module_sync_metadata
+from cartography.util import timeit
+logger = logging.getLogger(__name__)
+stat_handler = get_stats_client(__name__)
+_PAGE_SIZE = 10000
+_TIMEOUT = (60, 60)
+_MAX_RETRIES = 3
+@timeit
+def get_dependencies(semgrep_app_token: str, deployment_id: str, ecosystems: List[str]) -> List[Dict[str, Any]]:
+    """
+    Gets all dependencies for the given ecosystems within the given Semgrep deployment ID.
+    param: semgrep_app_token: The Semgrep App token to use for authentication.
+    param: deployment_id: The Semgrep deployment ID to use for retrieving dependencies.
+    param: ecosystems: One or more ecosystems to import dependencies from, e.g. "gomod" or "pypi".
+    The list of supported ecosystems is defined here:
+    https://semgrep.dev/api/v1/docs/#tag/SupplyChainService/operation/semgrep_app.products.sca.handlers.dependency.list_dependencies_conexxion
+    """
+    all_deps = []
+    deps_url = f"https://semgrep.dev/api/v1/deployments/{deployment_id}/dependencies"
+    has_more = True
+    page = 0
+    retries = 0
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {semgrep_app_token}",
+    }
+    request_data: dict[str, Any] = {
+        "pageSize": _PAGE_SIZE,
+        "dependencyFilter": {
+            "ecosystem": ecosystems,
+        },
+    }
+    logger.info(f"Retrieving Semgrep dependencies for deployment '{deployment_id}'.")
+    while has_more:
+        try:
+            response = requests.post(deps_url, json=request_data, headers=headers, timeout=_TIMEOUT)
+            response.raise_for_status()
+            data = response.json()
+        except (ReadTimeout, HTTPError):
+            logger.warning(f"Failed to retrieve Semgrep dependencies for page {page}. Retrying...")
+            retries += 1
+            if retries >= _MAX_RETRIES:
+                raise
+            continue
+        deps = data.get("dependencies", [])
+        has_more = data.get("hasMore", False)
+        logger.info(f"Processed page {page} of Semgrep dependencies.")
+        all_deps.extend(deps)
+        retries = 0
+        page += 1
+        request_data["cursor"] = data.get("cursor")
+    logger.info(f"Retrieved {len(all_deps)} Semgrep dependencies in {page} pages.")
+    return all_deps
+def transform_dependencies(raw_deps: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Transforms the raw dependencies response from Semgrep API into a list of dicts
+    that can be used to create the Dependency nodes.
+    """
+    """
+    sample raw_dep as of November 2024:
+    {
+        "repositoryId": "123456",
+        "definedAt": {
+            "path": "go.mod",
+            "startLine": "6",
+            "endLine": "6",
+            "url": "https://github.com/org/repo-name/blob/00000000000000000000000000000000/go.mod#L6",
+            "committedAt": "1970-01-01T00:00:00Z",
+            "startCol": "0",
+            "endCol": "0"
+        },
+        "transitivity": "DIRECT",
+        "package": {
+            "name": "github.com/foo/bar",
+            "versionSpecifier": "1.2.3"
+        },
+        "ecosystem": "gomod",
+        "licenses": [],
+        "pathToTransitivity": []
+    },
+    """
+    deps = []
+    for raw_dep in raw_deps:
+        # We could call a different endpoint to get all repo IDs and store a mapping of repo ID to URL,
+        # but it's much simpler to just extract the URL from the definedAt field.
+        repo_url = raw_dep["definedAt"]["url"].split("/blob/", 1)[0]
+        name = raw_dep["package"]["name"]
+        version = raw_dep["package"]["versionSpecifier"]
+        id = f"{name}|{version}"
+        # As of November 2024, Semgrep does not import dependencies with version specifiers such as >, <, etc.
+        # For now, hardcode the specifier to ==<version> to align with GitHub-sourced Python dependencies.
+        # If Semgrep eventually supports version specifiers, update this line accordingly.
+        specifier = f"=={version}"
+        deps.append({
+            # existing dependency properties:
+            "id": id,
+            "name": name,
+            "specifier": specifier,
+            "version": version,
+            "repo_url": repo_url,
+            # Semgrep-specific properties:
+            "ecosystem": raw_dep["ecosystem"],
+            "transitivity": raw_dep["transitivity"].lower(),
+            "url": raw_dep["definedAt"]["url"],
+        })
+    return deps
+@timeit
+def load_dependencies(
+    neo4j_session: neo4j.Session,
+    dependency_schema: Callable,
+    dependencies: List[Dict],
+    deployment_id: str,
+    update_tag: int,
+) -> None:
+    logger.info(f"Loading {len(dependencies)} {dependency_schema().label} objects into the graph.")
+    load(
+        neo4j_session,
+        dependency_schema(),
+        dependencies,
+        lastupdated=update_tag,
+        DEPLOYMENT_ID=deployment_id,
+    )
+@timeit
+def cleanup(
+    neo4j_session: neo4j.Session,
+    common_job_parameters: Dict[str, Any],
+) -> None:
+    logger.info("Running Semgrep Go Library cleanup job.")
+    go_libraries_cleanup_job = GraphJob.from_node_schema(
+        SemgrepGoLibrarySchema(), common_job_parameters,
+    )
+    go_libraries_cleanup_job.run(neo4j_session)
+@timeit
+def sync_dependencies(
+    neo4j_session: neo4j.Session,
+    semgrep_app_token: str,
+    update_tag: int,
+    common_job_parameters: Dict[str, Any],
+) -> None:
+    deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
+    if not deployment_id:
+        logger.warning(
+            "Missing Semgrep deployment ID, ensure that sync_deployment() has been called."
+            "Skipping Semgrep dependencies sync job.",
+        )
+        return
+    logger.info("Running Semgrep dependencies sync job.")
+    # fetch and load dependencies for the Go ecosystem
+    raw_go_deps = get_dependencies(semgrep_app_token, deployment_id, ecosystems=["gomod"])
+    go_deps = transform_dependencies(raw_go_deps)
+    load_dependencies(neo4j_session, SemgrepGoLibrarySchema, go_deps, deployment_id, update_tag)
+    cleanup(neo4j_session, common_job_parameters)
+    merge_module_sync_metadata(
+        neo4j_session=neo4j_session,
+        group_type='Semgrep',
+        group_id=deployment_id,
+        synced_type='SemgrepDependency',
+        update_tag=update_tag,
+        stat_handler=stat_handler,
+    )

cartography 0.94.0rc3__py3-none-any.whl → 0.95.0__py3-none-any.whl

Potentially problematic release.

cartography 0.94.0rc3py3-none-any.whl → 0.95.0py3-none-any.whl