cartography 0.104.0rc2__py3-none-any.whl → 0.123.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartography/_version.py +16 -3
- cartography/cli.py +466 -5
- cartography/client/aws/__init__.py +19 -0
- cartography/client/aws/ecr.py +51 -0
- cartography/client/core/tx.py +357 -8
- cartography/config.py +153 -0
- cartography/data/azure_permission_relationships.yaml +20 -0
- cartography/data/gcp_permission_relationships.yaml +21 -0
- cartography/data/indexes.cypher +0 -186
- cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
- cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
- cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
- cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
- cartography/driftdetect/cli.py +3 -2
- cartography/graph/cleanupbuilder.py +198 -41
- cartography/graph/job.py +54 -6
- cartography/graph/querybuilder.py +528 -27
- cartography/graph/statement.py +5 -1
- cartography/intel/airbyte/__init__.py +105 -0
- cartography/intel/airbyte/connections.py +120 -0
- cartography/intel/airbyte/destinations.py +81 -0
- cartography/intel/airbyte/organizations.py +59 -0
- cartography/intel/airbyte/sources.py +78 -0
- cartography/intel/airbyte/tags.py +64 -0
- cartography/intel/airbyte/users.py +106 -0
- cartography/intel/airbyte/util.py +122 -0
- cartography/intel/airbyte/workspaces.py +63 -0
- cartography/intel/aws/__init__.py +24 -9
- cartography/intel/aws/acm.py +124 -0
- cartography/intel/aws/apigateway.py +253 -22
- cartography/intel/aws/apigatewayv2.py +116 -0
- cartography/intel/aws/cloudtrail.py +17 -39
- cartography/intel/aws/cloudtrail_management_events.py +962 -0
- cartography/intel/aws/cloudwatch.py +150 -4
- cartography/intel/aws/codebuild.py +132 -0
- cartography/intel/aws/cognito.py +201 -0
- cartography/intel/aws/config.py +7 -3
- cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
- cartography/intel/aws/ec2/instances.py +25 -1
- cartography/intel/aws/ec2/internet_gateways.py +4 -2
- cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
- cartography/intel/aws/ec2/network_interfaces.py +5 -1
- cartography/intel/aws/ec2/reserved_instances.py +3 -1
- cartography/intel/aws/ec2/security_groups.py +140 -122
- cartography/intel/aws/ec2/snapshots.py +47 -84
- cartography/intel/aws/ec2/subnets.py +37 -63
- cartography/intel/aws/ec2/tgw.py +11 -5
- cartography/intel/aws/ec2/volumes.py +1 -1
- cartography/intel/aws/ec2/vpc.py +140 -124
- cartography/intel/aws/ec2/vpc_peerings.py +262 -125
- cartography/intel/aws/ecr.py +269 -98
- cartography/intel/aws/ecr_image_layers.py +923 -0
- cartography/intel/aws/ecs.py +251 -380
- cartography/intel/aws/efs.py +179 -11
- cartography/intel/aws/elasticache.py +102 -79
- cartography/intel/aws/elasticsearch.py +13 -4
- cartography/intel/aws/eventbridge.py +164 -0
- cartography/intel/aws/glue.py +181 -0
- cartography/intel/aws/guardduty.py +443 -0
- cartography/intel/aws/iam.py +750 -493
- cartography/intel/aws/identitycenter.py +605 -83
- cartography/intel/aws/inspector.py +221 -105
- cartography/intel/aws/kms.py +173 -201
- cartography/intel/aws/lambda_function.py +272 -189
- cartography/intel/aws/organizations.py +10 -9
- cartography/intel/aws/permission_relationships.py +10 -20
- cartography/intel/aws/rds.py +337 -446
- cartography/intel/aws/redshift.py +9 -4
- cartography/intel/aws/resourcegroupstaggingapi.py +78 -19
- cartography/intel/aws/resources.py +18 -0
- cartography/intel/aws/route53.py +386 -332
- cartography/intel/aws/s3.py +322 -14
- cartography/intel/aws/secretsmanager.py +81 -49
- cartography/intel/aws/securityhub.py +3 -1
- cartography/intel/aws/sns.py +62 -2
- cartography/intel/aws/sqs.py +36 -90
- cartography/intel/aws/ssm.py +3 -5
- cartography/intel/azure/__init__.py +202 -48
- cartography/intel/azure/aks.py +175 -0
- cartography/intel/azure/app_service.py +105 -0
- cartography/intel/azure/compute.py +59 -112
- cartography/intel/azure/container_instances.py +95 -0
- cartography/intel/azure/cosmosdb.py +222 -361
- cartography/intel/azure/data_factory.py +85 -0
- cartography/intel/azure/data_factory_dataset.py +128 -0
- cartography/intel/azure/data_factory_linked_service.py +119 -0
- cartography/intel/azure/data_factory_pipeline.py +142 -0
- cartography/intel/azure/data_lake.py +124 -0
- cartography/intel/azure/event_grid.py +94 -0
- cartography/intel/azure/functions.py +124 -0
- cartography/intel/azure/load_balancers.py +263 -0
- cartography/intel/azure/logic_apps.py +101 -0
- cartography/intel/azure/monitor.py +105 -0
- cartography/intel/azure/network.py +467 -0
- cartography/intel/azure/permission_relationships.py +466 -0
- cartography/intel/azure/rbac.py +309 -0
- cartography/intel/azure/resource_groups.py +82 -0
- cartography/intel/azure/security_center.py +106 -0
- cartography/intel/azure/sql.py +145 -292
- cartography/intel/azure/storage.py +185 -262
- cartography/intel/azure/subscription.py +21 -43
- cartography/intel/azure/tenant.py +39 -30
- cartography/intel/azure/util/common.py +13 -0
- cartography/intel/azure/util/credentials.py +49 -174
- cartography/intel/azure/util/tag.py +41 -0
- cartography/intel/create_indexes.py +2 -1
- cartography/intel/crowdstrike/spotlight.py +5 -2
- cartography/intel/dns.py +5 -2
- cartography/intel/entra/__init__.py +100 -1
- cartography/intel/entra/app_role_assignments.py +284 -0
- cartography/intel/entra/applications.py +182 -0
- cartography/intel/entra/federation/__init__.py +0 -0
- cartography/intel/entra/federation/aws_identity_center.py +77 -0
- cartography/intel/entra/groups.py +198 -0
- cartography/intel/entra/ou.py +48 -24
- cartography/intel/entra/service_principals.py +217 -0
- cartography/intel/entra/users.py +105 -57
- cartography/intel/gcp/__init__.py +334 -396
- cartography/intel/gcp/bigtable_app_profile.py +101 -0
- cartography/intel/gcp/bigtable_backup.py +91 -0
- cartography/intel/gcp/bigtable_cluster.py +93 -0
- cartography/intel/gcp/bigtable_instance.py +86 -0
- cartography/intel/gcp/bigtable_table.py +87 -0
- cartography/intel/gcp/cai.py +292 -0
- cartography/intel/gcp/clients.py +112 -0
- cartography/intel/gcp/compute.py +128 -119
- cartography/intel/gcp/crm/__init__.py +0 -0
- cartography/intel/gcp/crm/folders.py +114 -0
- cartography/intel/gcp/crm/orgs.py +70 -0
- cartography/intel/gcp/crm/projects.py +120 -0
- cartography/intel/gcp/dns.py +83 -169
- cartography/intel/gcp/gke.py +72 -113
- cartography/intel/gcp/iam.py +111 -91
- cartography/intel/gcp/permission_relationships.py +394 -0
- cartography/intel/gcp/policy_bindings.py +225 -0
- cartography/intel/gcp/storage.py +75 -159
- cartography/intel/github/__init__.py +62 -25
- cartography/intel/github/commits.py +423 -0
- cartography/intel/github/repos.py +463 -85
- cartography/intel/github/teams.py +3 -3
- cartography/intel/github/users.py +5 -0
- cartography/intel/github/util.py +12 -0
- cartography/intel/googleworkspace/__init__.py +193 -0
- cartography/intel/googleworkspace/devices.py +254 -0
- cartography/intel/googleworkspace/groups.py +568 -0
- cartography/intel/googleworkspace/oauth_apps.py +259 -0
- cartography/intel/googleworkspace/tenant.py +85 -0
- cartography/intel/googleworkspace/users.py +138 -0
- cartography/intel/gsuite/__init__.py +17 -9
- cartography/intel/gsuite/groups.py +291 -0
- cartography/intel/gsuite/users.py +142 -0
- cartography/intel/jamf/computers.py +7 -1
- cartography/intel/keycloak/__init__.py +153 -0
- cartography/intel/keycloak/authenticationexecutions.py +322 -0
- cartography/intel/keycloak/authenticationflows.py +77 -0
- cartography/intel/keycloak/clients.py +187 -0
- cartography/intel/keycloak/groups.py +126 -0
- cartography/intel/keycloak/identityproviders.py +94 -0
- cartography/intel/keycloak/organizations.py +163 -0
- cartography/intel/keycloak/realms.py +61 -0
- cartography/intel/keycloak/roles.py +202 -0
- cartography/intel/keycloak/scopes.py +73 -0
- cartography/intel/keycloak/users.py +70 -0
- cartography/intel/keycloak/util.py +47 -0
- cartography/intel/kubernetes/__init__.py +60 -14
- cartography/intel/kubernetes/clusters.py +86 -0
- cartography/intel/kubernetes/eks.py +402 -0
- cartography/intel/kubernetes/namespaces.py +59 -57
- cartography/intel/kubernetes/pods.py +168 -75
- cartography/intel/kubernetes/rbac.py +597 -0
- cartography/intel/kubernetes/secrets.py +95 -45
- cartography/intel/kubernetes/services.py +131 -67
- cartography/intel/kubernetes/util.py +142 -14
- cartography/intel/oci/iam.py +23 -9
- cartography/intel/oci/organizations.py +3 -1
- cartography/intel/oci/utils.py +28 -5
- cartography/intel/okta/applications.py +15 -5
- cartography/intel/okta/awssaml.py +14 -10
- cartography/intel/okta/factors.py +3 -1
- cartography/intel/okta/groups.py +5 -2
- cartography/intel/okta/organization.py +3 -1
- cartography/intel/okta/origins.py +3 -1
- cartography/intel/okta/roles.py +5 -2
- cartography/intel/okta/users.py +10 -2
- cartography/intel/ontology/__init__.py +44 -0
- cartography/intel/ontology/devices.py +54 -0
- cartography/intel/ontology/users.py +54 -0
- cartography/intel/ontology/utils.py +176 -0
- cartography/intel/pagerduty/escalation_policies.py +13 -6
- cartography/intel/pagerduty/schedules.py +9 -4
- cartography/intel/pagerduty/services.py +7 -3
- cartography/intel/pagerduty/teams.py +5 -2
- cartography/intel/pagerduty/users.py +3 -1
- cartography/intel/pagerduty/vendors.py +3 -1
- cartography/intel/scaleway/__init__.py +127 -0
- cartography/intel/scaleway/iam/__init__.py +0 -0
- cartography/intel/scaleway/iam/apikeys.py +71 -0
- cartography/intel/scaleway/iam/applications.py +71 -0
- cartography/intel/scaleway/iam/groups.py +71 -0
- cartography/intel/scaleway/iam/users.py +71 -0
- cartography/intel/scaleway/instances/__init__.py +0 -0
- cartography/intel/scaleway/instances/flexibleips.py +86 -0
- cartography/intel/scaleway/instances/instances.py +92 -0
- cartography/intel/scaleway/projects.py +79 -0
- cartography/intel/scaleway/storage/__init__.py +0 -0
- cartography/intel/scaleway/storage/snapshots.py +86 -0
- cartography/intel/scaleway/storage/volumes.py +84 -0
- cartography/intel/scaleway/utils.py +37 -0
- cartography/intel/sentinelone/__init__.py +75 -0
- cartography/intel/sentinelone/account.py +140 -0
- cartography/intel/sentinelone/agent.py +139 -0
- cartography/intel/sentinelone/api.py +124 -0
- cartography/intel/sentinelone/application.py +248 -0
- cartography/intel/sentinelone/cve.py +119 -0
- cartography/intel/sentinelone/utils.py +28 -0
- cartography/intel/slack/__init__.py +78 -0
- cartography/intel/slack/channels.py +80 -0
- cartography/intel/slack/groups.py +90 -0
- cartography/intel/slack/teams.py +65 -0
- cartography/intel/slack/users.py +57 -0
- cartography/intel/slack/utils.py +29 -0
- cartography/intel/spacelift/__init__.py +161 -0
- cartography/intel/spacelift/account.py +73 -0
- cartography/intel/spacelift/ec2_ownership.py +280 -0
- cartography/intel/spacelift/runs.py +463 -0
- cartography/intel/spacelift/spaces.py +112 -0
- cartography/intel/spacelift/stacks.py +119 -0
- cartography/intel/spacelift/util.py +122 -0
- cartography/intel/spacelift/workerpools.py +131 -0
- cartography/intel/spacelift/workers.py +128 -0
- cartography/intel/trivy/__init__.py +272 -0
- cartography/intel/trivy/scanner.py +386 -0
- cartography/models/airbyte/__init__.py +0 -0
- cartography/models/airbyte/connection.py +138 -0
- cartography/models/airbyte/destination.py +75 -0
- cartography/models/airbyte/organization.py +19 -0
- cartography/models/airbyte/source.py +75 -0
- cartography/models/airbyte/stream.py +74 -0
- cartography/models/airbyte/tag.py +69 -0
- cartography/models/airbyte/user.py +115 -0
- cartography/models/airbyte/workspace.py +46 -0
- cartography/models/anthropic/apikey.py +4 -0
- cartography/models/anthropic/user.py +4 -0
- cartography/models/aws/acm/__init__.py +0 -0
- cartography/models/aws/acm/certificate.py +75 -0
- cartography/models/aws/apigateway/__init__.py +0 -0
- cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
- cartography/models/aws/apigateway/apigatewayintegration.py +79 -0
- cartography/models/aws/apigateway/apigatewaymethod.py +74 -0
- cartography/models/aws/apigatewayv2/__init__.py +0 -0
- cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
- cartography/models/aws/cloudtrail/management_events.py +153 -0
- cartography/models/aws/cloudtrail/trail.py +45 -0
- cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
- cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
- cartography/models/aws/codebuild/__init__.py +0 -0
- cartography/models/aws/codebuild/project.py +49 -0
- cartography/models/aws/cognito/__init__.py +0 -0
- cartography/models/aws/cognito/identity_pool.py +70 -0
- cartography/models/aws/cognito/user_pool.py +47 -0
- cartography/models/aws/dynamodb/tables.py +2 -0
- cartography/models/aws/ec2/instances.py +25 -1
- cartography/models/aws/ec2/networkinterfaces.py +4 -0
- cartography/models/aws/ec2/security_group_rules.py +109 -0
- cartography/models/aws/ec2/security_groups.py +90 -0
- cartography/models/aws/ec2/snapshots.py +58 -0
- cartography/models/aws/ec2/subnet_instance.py +2 -0
- cartography/models/aws/ec2/subnet_networkinterface.py +2 -0
- cartography/models/aws/ec2/subnets.py +65 -0
- cartography/models/aws/ec2/volumes.py +20 -0
- cartography/models/aws/ec2/vpc.py +46 -0
- cartography/models/aws/ec2/vpc_cidr.py +102 -0
- cartography/models/aws/ec2/vpc_peering.py +157 -0
- cartography/models/aws/ecr/__init__.py +0 -0
- cartography/models/aws/ecr/image.py +146 -0
- cartography/models/aws/ecr/image_layer.py +107 -0
- cartography/models/aws/ecr/repository.py +72 -0
- cartography/models/aws/ecr/repository_image.py +95 -0
- cartography/models/aws/ecs/__init__.py +0 -0
- cartography/models/aws/ecs/clusters.py +64 -0
- cartography/models/aws/ecs/container_definitions.py +93 -0
- cartography/models/aws/ecs/container_instances.py +84 -0
- cartography/models/aws/ecs/containers.py +101 -0
- cartography/models/aws/ecs/services.py +134 -0
- cartography/models/aws/ecs/task_definitions.py +135 -0
- cartography/models/aws/ecs/tasks.py +134 -0
- cartography/models/aws/efs/access_point.py +77 -0
- cartography/models/aws/efs/file_system.py +60 -0
- cartography/models/aws/efs/mount_target.py +29 -2
- cartography/models/aws/elasticache/__init__.py +0 -0
- cartography/models/aws/elasticache/cluster.py +65 -0
- cartography/models/aws/elasticache/topic.py +67 -0
- cartography/models/aws/eventbridge/__init__.py +0 -0
- cartography/models/aws/eventbridge/rule.py +77 -0
- cartography/models/aws/eventbridge/target.py +71 -0
- cartography/models/aws/glue/__init__.py +0 -0
- cartography/models/aws/glue/connection.py +51 -0
- cartography/models/aws/glue/job.py +69 -0
- cartography/models/aws/guardduty/__init__.py +1 -0
- cartography/models/aws/guardduty/detectors.py +50 -0
- cartography/models/aws/guardduty/findings.py +121 -0
- cartography/models/aws/iam/access_key.py +103 -0
- cartography/models/aws/iam/account_role.py +24 -0
- cartography/models/aws/iam/federated_principal.py +60 -0
- cartography/models/aws/iam/group.py +60 -0
- cartography/models/aws/iam/group_membership.py +27 -0
- cartography/models/aws/iam/inline_policy.py +78 -0
- cartography/models/aws/iam/managed_policy.py +51 -0
- cartography/models/aws/iam/policy_statement.py +57 -0
- cartography/models/aws/iam/role.py +83 -0
- cartography/models/aws/iam/root_principal.py +52 -0
- cartography/models/aws/iam/service_principal.py +30 -0
- cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
- cartography/models/aws/iam/user.py +59 -0
- cartography/models/aws/identitycenter/awsidentitycenter.py +1 -0
- cartography/models/aws/identitycenter/awspermissionset.py +70 -0
- cartography/models/aws/identitycenter/awssogroup.py +70 -0
- cartography/models/aws/identitycenter/awsssouser.py +49 -9
- cartography/models/aws/inspector/findings.py +37 -0
- cartography/models/aws/inspector/packages.py +1 -31
- cartography/models/aws/kms/__init__.py +0 -0
- cartography/models/aws/kms/aliases.py +86 -0
- cartography/models/aws/kms/grants.py +65 -0
- cartography/models/aws/kms/keys.py +88 -0
- cartography/models/aws/lambda_function/__init__.py +0 -0
- cartography/models/aws/lambda_function/alias.py +74 -0
- cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
- cartography/models/aws/lambda_function/lambda_function.py +91 -0
- cartography/models/aws/lambda_function/layer.py +72 -0
- cartography/models/aws/rds/__init__.py +0 -0
- cartography/models/aws/rds/cluster.py +91 -0
- cartography/models/aws/rds/event_subscription.py +146 -0
- cartography/models/aws/rds/instance.py +156 -0
- cartography/models/aws/rds/snapshot.py +108 -0
- cartography/models/aws/rds/subnet_group.py +101 -0
- cartography/models/aws/route53/__init__.py +0 -0
- cartography/models/aws/route53/dnsrecord.py +235 -0
- cartography/models/aws/route53/nameserver.py +63 -0
- cartography/models/aws/route53/subzone.py +40 -0
- cartography/models/aws/route53/zone.py +47 -0
- cartography/models/aws/s3/notification.py +24 -0
- cartography/models/aws/secretsmanager/secret.py +106 -0
- cartography/models/aws/secretsmanager/secret_version.py +0 -2
- cartography/models/aws/sns/topic_subscription.py +74 -0
- cartography/models/aws/sqs/__init__.py +0 -0
- cartography/models/aws/sqs/queue.py +89 -0
- cartography/models/azure/__init__.py +0 -0
- cartography/models/azure/aks_cluster.py +54 -0
- cartography/models/azure/aks_nodepool.py +54 -0
- cartography/models/azure/app_service.py +59 -0
- cartography/models/azure/container_instance.py +57 -0
- cartography/models/azure/cosmosdb/__init__.py +0 -0
- cartography/models/azure/cosmosdb/account.py +77 -0
- cartography/models/azure/cosmosdb/accountfailoverpolicy.py +77 -0
- cartography/models/azure/cosmosdb/cassandrakeyspace.py +82 -0
- cartography/models/azure/cosmosdb/cassandratable.py +81 -0
- cartography/models/azure/cosmosdb/corspolicy.py +74 -0
- cartography/models/azure/cosmosdb/dblocation.py +120 -0
- cartography/models/azure/cosmosdb/mongodbcollection.py +82 -0
- cartography/models/azure/cosmosdb/mongodbdatabase.py +78 -0
- cartography/models/azure/cosmosdb/privateendpointconnection.py +81 -0
- cartography/models/azure/cosmosdb/sqlcontainer.py +88 -0
- cartography/models/azure/cosmosdb/sqldatabase.py +78 -0
- cartography/models/azure/cosmosdb/tableresource.py +76 -0
- cartography/models/azure/cosmosdb/virtualnetworkrule.py +78 -0
- cartography/models/azure/data_factory/__init__.py +0 -0
- cartography/models/azure/data_factory/data_factory.py +51 -0
- cartography/models/azure/data_factory/data_factory_dataset.py +94 -0
- cartography/models/azure/data_factory/data_factory_linked_service.py +78 -0
- cartography/models/azure/data_factory/data_factory_pipeline.py +93 -0
- cartography/models/azure/data_lake_filesystem.py +51 -0
- cartography/models/azure/event_grid_topic.py +57 -0
- cartography/models/azure/function_app.py +59 -0
- cartography/models/azure/load_balancer/__init__.py +0 -0
- cartography/models/azure/load_balancer/load_balancer.py +49 -0
- cartography/models/azure/load_balancer/load_balancer_backend_pool.py +73 -0
- cartography/models/azure/load_balancer/load_balancer_frontend_ip.py +75 -0
- cartography/models/azure/load_balancer/load_balancer_inbound_nat_rule.py +78 -0
- cartography/models/azure/load_balancer/load_balancer_rule.py +108 -0
- cartography/models/azure/logic_apps.py +56 -0
- cartography/models/azure/monitor.py +54 -0
- cartography/models/azure/network_interface.py +112 -0
- cartography/models/azure/network_security_group.py +50 -0
- cartography/models/azure/permission_relationships.py +60 -0
- cartography/models/azure/principal.py +41 -0
- cartography/models/azure/public_ip_address.py +50 -0
- cartography/models/azure/rbac.py +268 -0
- cartography/models/azure/resource_groups.py +52 -0
- cartography/models/azure/security_center.py +50 -0
- cartography/models/azure/sql/__init__.py +0 -0
- cartography/models/azure/sql/databasethreatdetectionpolicy.py +85 -0
- cartography/models/azure/sql/elasticpool.py +77 -0
- cartography/models/azure/sql/failovergroup.py +73 -0
- cartography/models/azure/sql/recoverabledatabase.py +75 -0
- cartography/models/azure/sql/replicationlink.py +81 -0
- cartography/models/azure/sql/restorabledroppeddatabase.py +82 -0
- cartography/models/azure/sql/restorepoint.py +74 -0
- cartography/models/azure/sql/serveradadministrator.py +74 -0
- cartography/models/azure/sql/serverdnsalias.py +71 -0
- cartography/models/azure/sql/sqldatabase.py +85 -0
- cartography/models/azure/sql/sqlserver.py +50 -0
- cartography/models/azure/sql/transparentdataencryption.py +76 -0
- cartography/models/azure/storage/__init__.py +0 -0
- cartography/models/azure/storage/account.py +59 -0
- cartography/models/azure/storage/blobcontainer.py +85 -0
- cartography/models/azure/storage/blobservice.py +71 -0
- cartography/models/azure/storage/fileservice.py +71 -0
- cartography/models/azure/storage/fileshare.py +82 -0
- cartography/models/azure/storage/queue.py +71 -0
- cartography/models/azure/storage/queueservice.py +73 -0
- cartography/models/azure/storage/table.py +72 -0
- cartography/models/azure/storage/tableservice.py +73 -0
- cartography/models/azure/subnet.py +101 -0
- cartography/models/azure/subscription.py +47 -0
- cartography/models/azure/tags/__init__.py +0 -0
- cartography/models/azure/tags/storage_tag.py +40 -0
- cartography/models/azure/tags/tag.py +37 -0
- cartography/models/azure/tenant.py +17 -0
- cartography/models/azure/virtual_network.py +49 -0
- cartography/models/azure/vm/__init__.py +0 -0
- cartography/models/azure/vm/datadisk.py +80 -0
- cartography/models/azure/vm/disk.py +55 -0
- cartography/models/azure/vm/snapshot.py +56 -0
- cartography/models/azure/vm/virtualmachine.py +59 -0
- cartography/models/bigfix/bigfix_computer.py +1 -1
- cartography/models/cloudflare/member.py +4 -0
- cartography/models/core/common.py +1 -0
- cartography/models/core/nodes.py +15 -2
- cartography/models/core/relationships.py +44 -0
- cartography/models/crowdstrike/hosts.py +1 -1
- cartography/models/digitalocean/droplet.py +2 -0
- cartography/models/duo/endpoint.py +1 -1
- cartography/models/duo/phone.py +2 -2
- cartography/models/duo/user.py +4 -0
- cartography/models/entra/app_role_assignment.py +115 -0
- cartography/models/entra/application.py +49 -0
- cartography/models/entra/entra_user_to_aws_sso.py +41 -0
- cartography/models/entra/group.py +117 -0
- cartography/models/entra/service_principal.py +104 -0
- cartography/models/entra/user.py +42 -51
- cartography/models/gcp/__init__.py +0 -0
- cartography/models/gcp/bigtable/__init__.py +0 -0
- cartography/models/gcp/bigtable/app_profile.py +94 -0
- cartography/models/gcp/bigtable/backup.py +91 -0
- cartography/models/gcp/bigtable/cluster.py +73 -0
- cartography/models/gcp/bigtable/instance.py +52 -0
- cartography/models/gcp/bigtable/table.py +69 -0
- cartography/models/gcp/compute/__init__.py +0 -0
- cartography/models/gcp/compute/subnet.py +74 -0
- cartography/models/gcp/compute/vpc.py +50 -0
- cartography/models/gcp/crm/__init__.py +0 -0
- cartography/models/gcp/crm/folders.py +98 -0
- cartography/models/gcp/crm/organizations.py +21 -0
- cartography/models/gcp/crm/projects.py +100 -0
- cartography/models/gcp/dns.py +109 -0
- cartography/models/gcp/gke.py +69 -0
- cartography/models/gcp/iam.py +3 -0
- cartography/models/gcp/permission_relationships.py +61 -0
- cartography/models/gcp/policy_bindings.py +93 -0
- cartography/models/gcp/storage/__init__.py +0 -0
- cartography/models/gcp/storage/bucket.py +119 -0
- cartography/models/github/commits.py +63 -0
- cartography/models/github/dependencies.py +73 -0
- cartography/models/github/manifests.py +49 -0
- cartography/models/github/users.py +10 -0
- cartography/models/googleworkspace/__init__.py +0 -0
- cartography/models/googleworkspace/device.py +132 -0
- cartography/models/googleworkspace/group.py +382 -0
- cartography/models/googleworkspace/oauth_app.py +124 -0
- cartography/models/googleworkspace/tenant.py +30 -0
- cartography/models/googleworkspace/user.py +113 -0
- cartography/models/gsuite/__init__.py +0 -0
- cartography/models/gsuite/group.py +218 -0
- cartography/models/gsuite/tenant.py +29 -0
- cartography/models/gsuite/user.py +107 -0
- cartography/models/kandji/device.py +1 -2
- cartography/models/keycloak/__init__.py +0 -0
- cartography/models/keycloak/authenticationexecution.py +160 -0
- cartography/models/keycloak/authenticationflow.py +54 -0
- cartography/models/keycloak/client.py +179 -0
- cartography/models/keycloak/group.py +101 -0
- cartography/models/keycloak/identityprovider.py +89 -0
- cartography/models/keycloak/organization.py +116 -0
- cartography/models/keycloak/organizationdomain.py +73 -0
- cartography/models/keycloak/realm.py +173 -0
- cartography/models/keycloak/role.py +126 -0
- cartography/models/keycloak/scope.py +73 -0
- cartography/models/keycloak/user.py +55 -0
- cartography/models/kubernetes/__init__.py +0 -0
- cartography/models/kubernetes/clusterrolebindings.py +138 -0
- cartography/models/kubernetes/clusterroles.py +52 -0
- cartography/models/kubernetes/clusters.py +26 -0
- cartography/models/kubernetes/containers.py +133 -0
- cartography/models/kubernetes/groups.py +107 -0
- cartography/models/kubernetes/namespaces.py +51 -0
- cartography/models/kubernetes/oidc.py +51 -0
- cartography/models/kubernetes/pods.py +80 -0
- cartography/models/kubernetes/rolebindings.py +159 -0
- cartography/models/kubernetes/roles.py +76 -0
- cartography/models/kubernetes/secrets.py +79 -0
- cartography/models/kubernetes/serviceaccounts.py +77 -0
- cartography/models/kubernetes/services.py +108 -0
- cartography/models/kubernetes/users.py +105 -0
- cartography/models/lastpass/user.py +4 -0
- cartography/models/ontology/__init__.py +0 -0
- cartography/models/ontology/device.py +137 -0
- cartography/models/ontology/mapping/__init__.py +76 -0
- cartography/models/ontology/mapping/data/__init__.py +0 -0
- cartography/models/ontology/mapping/data/apikeys.py +93 -0
- cartography/models/ontology/mapping/data/computeinstance.py +95 -0
- cartography/models/ontology/mapping/data/containers.py +88 -0
- cartography/models/ontology/mapping/data/databases.py +182 -0
- cartography/models/ontology/mapping/data/devices.py +194 -0
- cartography/models/ontology/mapping/data/thirdpartyapps.py +140 -0
- cartography/models/ontology/mapping/data/useraccounts.py +416 -0
- cartography/models/ontology/mapping/data/users.py +63 -0
- cartography/models/ontology/mapping/specs.py +85 -0
- cartography/models/ontology/user.py +51 -0
- cartography/models/openai/adminapikey.py +4 -0
- cartography/models/openai/apikey.py +4 -0
- cartography/models/openai/user.py +4 -0
- cartography/models/scaleway/__init__.py +0 -0
- cartography/models/scaleway/iam/__init__.py +0 -0
- cartography/models/scaleway/iam/apikey.py +100 -0
- cartography/models/scaleway/iam/application.py +52 -0
- cartography/models/scaleway/iam/group.py +95 -0
- cartography/models/scaleway/iam/user.py +64 -0
- cartography/models/scaleway/instance/__init__.py +0 -0
- cartography/models/scaleway/instance/flexibleip.py +52 -0
- cartography/models/scaleway/instance/instance.py +120 -0
- cartography/models/scaleway/organization.py +19 -0
- cartography/models/scaleway/project.py +48 -0
- cartography/models/scaleway/storage/__init__.py +0 -0
- cartography/models/scaleway/storage/snapshot.py +78 -0
- cartography/models/scaleway/storage/volume.py +51 -0
- cartography/models/sentinelone/__init__.py +1 -0
- cartography/models/sentinelone/account.py +40 -0
- cartography/models/sentinelone/agent.py +50 -0
- cartography/models/sentinelone/application.py +44 -0
- cartography/models/sentinelone/application_version.py +96 -0
- cartography/models/sentinelone/cve.py +73 -0
- cartography/models/slack/__init__.py +0 -0
- cartography/models/slack/channels.py +92 -0
- cartography/models/slack/group.py +129 -0
- cartography/models/slack/team.py +22 -0
- cartography/models/slack/user.py +62 -0
- cartography/models/snipeit/asset.py +2 -0
- cartography/models/snipeit/user.py +4 -0
- cartography/models/spacelift/__init__.py +0 -0
- cartography/models/spacelift/cloudtrailevent.py +120 -0
- cartography/models/spacelift/run.py +162 -0
- cartography/models/spacelift/space.py +131 -0
- cartography/models/spacelift/spaceliftaccount.py +31 -0
- cartography/models/spacelift/spaceliftgitcommit.py +157 -0
- cartography/models/spacelift/stack.py +96 -0
- cartography/models/spacelift/user.py +63 -0
- cartography/models/spacelift/worker.py +97 -0
- cartography/models/spacelift/workerpool.py +90 -0
- cartography/models/tailscale/device.py +2 -1
- cartography/models/tailscale/user.py +6 -1
- cartography/models/trivy/__init__.py +0 -0
- cartography/models/trivy/findings.py +66 -0
- cartography/models/trivy/fix.py +66 -0
- cartography/models/trivy/package.py +71 -0
- cartography/rules/README.md +1 -0
- cartography/rules/__init__.py +0 -0
- cartography/rules/cli.py +261 -0
- cartography/rules/data/__init__.py +0 -0
- cartography/rules/data/rules/__init__.py +46 -0
- cartography/rules/data/rules/cloud_security_product_deactivated.py +49 -0
- cartography/rules/data/rules/compute_instance_exposed.py +51 -0
- cartography/rules/data/rules/database_instance_exposed.py +53 -0
- cartography/rules/data/rules/delegation_boundary_modifiable.py +90 -0
- cartography/rules/data/rules/identity_administration_privileges.py +100 -0
- cartography/rules/data/rules/inactive_user_active_accounts.py +48 -0
- cartography/rules/data/rules/malicious_npm_dependencies_shai_hulud.py +2222 -0
- cartography/rules/data/rules/mfa_missing.py +46 -0
- cartography/rules/data/rules/object_storage_public.py +100 -0
- cartography/rules/data/rules/policy_administration_privileges.py +104 -0
- cartography/rules/data/rules/unmanaged_accounts.py +43 -0
- cartography/rules/data/rules/workload_identity_admin_capabilities.py +193 -0
- cartography/rules/formatters.py +108 -0
- cartography/rules/runners.py +216 -0
- cartography/rules/spec/__init__.py +0 -0
- cartography/rules/spec/model.py +267 -0
- cartography/rules/spec/result.py +38 -0
- cartography/sync.py +25 -5
- cartography/util.py +101 -31
- {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/METADATA +61 -22
- cartography-0.123.0.dist-info/RECORD +856 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/entry_points.txt +1 -0
- cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
- cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
- cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
- cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
- cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
- cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
- cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
- cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
- cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_vpc_peering_cleanup.json +0 -45
- cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
- cartography/data/jobs/cleanup/azure_cosmosdb_cassandra_keyspace_cleanup.json +0 -25
- cartography/data/jobs/cleanup/azure_cosmosdb_cors_details.json +0 -15
- cartography/data/jobs/cleanup/azure_cosmosdb_mongodb_database_cleanup.json +0 -25
- cartography/data/jobs/cleanup/azure_cosmosdb_sql_database_cleanup.json +0 -25
- cartography/data/jobs/cleanup/azure_cosmosdb_table_resources_cleanup.json +0 -15
- cartography/data/jobs/cleanup/azure_database_account_cleanup.json +0 -85
- cartography/data/jobs/cleanup/azure_import_disks_cleanup.json +0 -15
- cartography/data/jobs/cleanup/azure_import_snapshots_cleanup.json +0 -15
- cartography/data/jobs/cleanup/azure_import_virtual_machines_cleanup.json +0 -25
- cartography/data/jobs/cleanup/azure_sql_server_cleanup.json +0 -125
- cartography/data/jobs/cleanup/azure_storage_account_cleanup.json +0 -95
- cartography/data/jobs/cleanup/azure_subscriptions_cleanup.json +0 -14
- cartography/data/jobs/cleanup/azure_tenant_cleanup.json +0 -9
- cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
- cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
- cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
- cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
- cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
- cartography/intel/gcp/crm.py +0 -355
- cartography/intel/gsuite/api.py +0 -342
- cartography-0.104.0rc2.dist-info/RECORD +0 -455
- /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
- /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
- /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
- /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
- /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/WHEEL +0 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import configparser
|
|
2
2
|
import logging
|
|
3
|
+
from collections import defaultdict
|
|
3
4
|
from collections import namedtuple
|
|
4
5
|
from string import Template
|
|
5
6
|
from typing import Any
|
|
7
|
+
from typing import cast
|
|
6
8
|
from typing import Dict
|
|
7
9
|
from typing import List
|
|
8
10
|
from typing import Optional
|
|
@@ -12,8 +14,13 @@ from packaging.requirements import InvalidRequirement
|
|
|
12
14
|
from packaging.requirements import Requirement
|
|
13
15
|
from packaging.utils import canonicalize_name
|
|
14
16
|
|
|
17
|
+
from cartography.client.core.tx import execute_write_with_retry
|
|
18
|
+
from cartography.client.core.tx import load as load_data
|
|
19
|
+
from cartography.graph.job import GraphJob
|
|
15
20
|
from cartography.intel.github.util import fetch_all
|
|
16
21
|
from cartography.intel.github.util import PaginatedGraphqlData
|
|
22
|
+
from cartography.models.github.dependencies import GitHubDependencySchema
|
|
23
|
+
from cartography.models.github.manifests import DependencyGraphManifestSchema
|
|
17
24
|
from cartography.util import backoff_handler
|
|
18
25
|
from cartography.util import retries_with_backoff
|
|
19
26
|
from cartography.util import run_cleanup_job
|
|
@@ -36,12 +43,12 @@ UserAffiliationAndRepoPermission = namedtuple(
|
|
|
36
43
|
|
|
37
44
|
|
|
38
45
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
39
|
-
query($login: String!, $cursor: String) {
|
|
46
|
+
query($login: String!, $cursor: String, $count: Int!) {
|
|
40
47
|
organization(login: $login)
|
|
41
48
|
{
|
|
42
49
|
url
|
|
43
50
|
login
|
|
44
|
-
repositories(first:
|
|
51
|
+
repositories(first: $count, after: $cursor){
|
|
45
52
|
pageInfo{
|
|
46
53
|
endCursor
|
|
47
54
|
hasNextPage
|
|
@@ -93,6 +100,18 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
93
100
|
text
|
|
94
101
|
}
|
|
95
102
|
}
|
|
103
|
+
dependencyGraphManifests(first: 20) {
|
|
104
|
+
nodes {
|
|
105
|
+
blobPath
|
|
106
|
+
dependencies(first: 100) {
|
|
107
|
+
nodes {
|
|
108
|
+
packageName
|
|
109
|
+
requirements
|
|
110
|
+
packageManager
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
96
115
|
}
|
|
97
116
|
}
|
|
98
117
|
}
|
|
@@ -140,25 +159,38 @@ def _get_repo_collaborators_inner_func(
|
|
|
140
159
|
org: str,
|
|
141
160
|
api_url: str,
|
|
142
161
|
token: str,
|
|
143
|
-
repo_raw_data: list[dict[str, Any]],
|
|
162
|
+
repo_raw_data: list[dict[str, Any] | None],
|
|
144
163
|
affiliation: str,
|
|
145
|
-
collab_users: list[dict[str, Any]],
|
|
146
|
-
collab_permission: list[str],
|
|
147
164
|
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
148
165
|
result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
149
166
|
|
|
150
167
|
for repo in repo_raw_data:
|
|
168
|
+
# GitHub can return null repo entries. See issues #1334 and #1404.
|
|
169
|
+
if repo is None:
|
|
170
|
+
logger.info(
|
|
171
|
+
"Skipping null repository entry while fetching %s collaborators.",
|
|
172
|
+
affiliation,
|
|
173
|
+
)
|
|
174
|
+
continue
|
|
151
175
|
repo_name = repo["name"]
|
|
152
176
|
repo_url = repo["url"]
|
|
153
177
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
178
|
+
# Guard against None when collaborator fields are not accessible due to permissions.
|
|
179
|
+
direct_info = repo.get("directCollaborators")
|
|
180
|
+
outside_info = repo.get("outsideCollaborators")
|
|
181
|
+
|
|
182
|
+
if affiliation == "OUTSIDE":
|
|
183
|
+
total_outside = 0 if not outside_info else outside_info.get("totalCount", 0)
|
|
184
|
+
if total_outside == 0:
|
|
185
|
+
# No outside collaborators or not permitted to view; skip API calls for this repo.
|
|
186
|
+
result[repo_url] = []
|
|
187
|
+
continue
|
|
188
|
+
else: # DIRECT
|
|
189
|
+
total_direct = 0 if not direct_info else direct_info.get("totalCount", 0)
|
|
190
|
+
if total_direct == 0:
|
|
191
|
+
# No direct collaborators or not permitted to view; skip API calls for this repo.
|
|
192
|
+
result[repo_url] = []
|
|
193
|
+
continue
|
|
162
194
|
|
|
163
195
|
logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
|
|
164
196
|
collaborators = _get_repo_collaborators(
|
|
@@ -169,6 +201,9 @@ def _get_repo_collaborators_inner_func(
|
|
|
169
201
|
affiliation,
|
|
170
202
|
)
|
|
171
203
|
|
|
204
|
+
collab_users: List[dict[str, Any]] = []
|
|
205
|
+
collab_permission: List[str] = []
|
|
206
|
+
|
|
172
207
|
# nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
|
|
173
208
|
# however sometimes GitHub returns None, as in issue 1334 and 1404.
|
|
174
209
|
for collab in collaborators.nodes or []:
|
|
@@ -186,7 +221,7 @@ def _get_repo_collaborators_inner_func(
|
|
|
186
221
|
|
|
187
222
|
|
|
188
223
|
def _get_repo_collaborators_for_multiple_repos(
|
|
189
|
-
repo_raw_data: list[dict[str, Any]],
|
|
224
|
+
repo_raw_data: list[dict[str, Any] | None],
|
|
190
225
|
affiliation: str,
|
|
191
226
|
org: str,
|
|
192
227
|
api_url: str,
|
|
@@ -205,8 +240,6 @@ def _get_repo_collaborators_for_multiple_repos(
|
|
|
205
240
|
logger.info(
|
|
206
241
|
f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".',
|
|
207
242
|
)
|
|
208
|
-
collab_users: List[dict[str, Any]] = []
|
|
209
|
-
collab_permission: List[str] = []
|
|
210
243
|
|
|
211
244
|
result: dict[str, list[UserAffiliationAndRepoPermission]] = retries_with_backoff(
|
|
212
245
|
_get_repo_collaborators_inner_func,
|
|
@@ -219,8 +252,6 @@ def _get_repo_collaborators_for_multiple_repos(
|
|
|
219
252
|
token=token,
|
|
220
253
|
repo_raw_data=repo_raw_data,
|
|
221
254
|
affiliation=affiliation,
|
|
222
|
-
collab_users=collab_users,
|
|
223
|
-
collab_permission=collab_permission,
|
|
224
255
|
)
|
|
225
256
|
return result
|
|
226
257
|
|
|
@@ -257,7 +288,7 @@ def _get_repo_collaborators(
|
|
|
257
288
|
|
|
258
289
|
|
|
259
290
|
@timeit
|
|
260
|
-
def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
291
|
+
def get(token: str, api_url: str, organization: str) -> List[Optional[Dict]]:
|
|
261
292
|
"""
|
|
262
293
|
Retrieve a list of repos from a Github organization as described in
|
|
263
294
|
https://docs.github.com/en/graphql/reference/objects#repository.
|
|
@@ -265,6 +296,8 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
265
296
|
:param api_url: The Github v4 API endpoint as string.
|
|
266
297
|
:param organization: The name of the target Github organization as string.
|
|
267
298
|
:return: A list of dicts representing repos. See tests.data.github.repos for data shape.
|
|
299
|
+
Note: The list may contain None entries per GraphQL spec when resolvers error
|
|
300
|
+
(permissions, rate limits, transient issues). See issues #1334 and #1404.
|
|
268
301
|
"""
|
|
269
302
|
# TODO: link the Github organization to the repositories
|
|
270
303
|
repos, _ = fetch_all(
|
|
@@ -273,12 +306,17 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
273
306
|
organization,
|
|
274
307
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
|
|
275
308
|
"repositories",
|
|
309
|
+
count=50,
|
|
276
310
|
)
|
|
277
|
-
|
|
311
|
+
# Cast is needed because GitHub's GraphQL RepositoryConnection.nodes is typed [Repository] (not [Repository!])
|
|
312
|
+
# per GraphQL spec, allowing null entries when resolvers error (permissions, rate limits, transient issues).
|
|
313
|
+
# See https://github.com/cartography-cncf/cartography/issues/1334
|
|
314
|
+
# and https://github.com/cartography-cncf/cartography/issues/1404
|
|
315
|
+
return cast(List[Optional[Dict]], repos.nodes)
|
|
278
316
|
|
|
279
317
|
|
|
280
318
|
def transform(
|
|
281
|
-
repos_json: List[Dict],
|
|
319
|
+
repos_json: List[Optional[Dict]],
|
|
282
320
|
direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
283
321
|
outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
284
322
|
) -> Dict:
|
|
@@ -291,8 +329,10 @@ def transform(
|
|
|
291
329
|
:param outside_collaborators: dict of repo URL to list of outside collaborators.
|
|
292
330
|
See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
|
|
293
331
|
:return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
|
|
294
|
-
mapping,
|
|
332
|
+
mapping, Python requirements files (if any) in a repo, manifests from GitHub's dependency graph, and all
|
|
333
|
+
dependencies from GitHub's dependency graph.
|
|
295
334
|
"""
|
|
335
|
+
logger.info(f"Processing {len(repos_json)} GitHub repositories")
|
|
296
336
|
transformed_repo_list: List[Dict] = []
|
|
297
337
|
transformed_repo_languages: List[Dict] = []
|
|
298
338
|
transformed_repo_owners: List[Dict] = []
|
|
@@ -312,7 +352,13 @@ def transform(
|
|
|
312
352
|
"WRITE": [],
|
|
313
353
|
}
|
|
314
354
|
transformed_requirements_files: List[Dict] = []
|
|
355
|
+
transformed_dependencies: List[Dict] = []
|
|
356
|
+
transformed_manifests: List[Dict] = []
|
|
315
357
|
for repo_object in repos_json:
|
|
358
|
+
# GitHub can return null repo entries. See issues #1334 and #1404.
|
|
359
|
+
if repo_object is None:
|
|
360
|
+
logger.debug("Skipping null repository entry during transformation.")
|
|
361
|
+
continue
|
|
316
362
|
_transform_repo_languages(
|
|
317
363
|
repo_object["url"],
|
|
318
364
|
repo_object,
|
|
@@ -350,6 +396,16 @@ def transform(
|
|
|
350
396
|
repo_url,
|
|
351
397
|
transformed_requirements_files,
|
|
352
398
|
)
|
|
399
|
+
_transform_dependency_manifests(
|
|
400
|
+
repo_object.get("dependencyGraphManifests"),
|
|
401
|
+
repo_url,
|
|
402
|
+
transformed_manifests,
|
|
403
|
+
)
|
|
404
|
+
_transform_dependency_graph(
|
|
405
|
+
repo_object.get("dependencyGraphManifests"),
|
|
406
|
+
repo_url,
|
|
407
|
+
transformed_dependencies,
|
|
408
|
+
)
|
|
353
409
|
results = {
|
|
354
410
|
"repos": transformed_repo_list,
|
|
355
411
|
"repo_languages": transformed_repo_languages,
|
|
@@ -357,7 +413,10 @@ def transform(
|
|
|
357
413
|
"repo_outside_collaborators": transformed_outside_collaborators,
|
|
358
414
|
"repo_direct_collaborators": transformed_direct_collaborators,
|
|
359
415
|
"python_requirements": transformed_requirements_files,
|
|
416
|
+
"dependencies": transformed_dependencies,
|
|
417
|
+
"manifests": transformed_manifests,
|
|
360
418
|
}
|
|
419
|
+
|
|
361
420
|
return results
|
|
362
421
|
|
|
363
422
|
|
|
@@ -371,9 +430,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
|
|
|
371
430
|
|
|
372
431
|
def _create_git_url_from_ssh_url(ssh_url: str) -> str:
|
|
373
432
|
"""
|
|
374
|
-
|
|
433
|
+
Convert SSH URL to git:// URL.
|
|
434
|
+
Example:
|
|
435
|
+
git@github.com:cartography-cncf/cartography.git
|
|
436
|
+
-> git://github.com/cartography-cncf/cartography.git
|
|
375
437
|
"""
|
|
376
|
-
|
|
438
|
+
# Remove the user part (e.g., "git@")
|
|
439
|
+
_, host_and_path = ssh_url.split("@", 1)
|
|
440
|
+
# Replace first ':' (separating host and repo) with '/'
|
|
441
|
+
host, path = host_and_path.split(":", 1)
|
|
442
|
+
return f"git://{host}/{path}"
|
|
377
443
|
|
|
378
444
|
|
|
379
445
|
def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
|
|
@@ -533,6 +599,153 @@ def _transform_setup_cfg_requirements(
|
|
|
533
599
|
_transform_python_requirements(requirements_list, repo_url, out_requirements_files)
|
|
534
600
|
|
|
535
601
|
|
|
602
|
+
def _transform_dependency_manifests(
|
|
603
|
+
dependency_manifests: Optional[Dict],
|
|
604
|
+
repo_url: str,
|
|
605
|
+
out_manifests_list: List[Dict],
|
|
606
|
+
) -> None:
|
|
607
|
+
"""
|
|
608
|
+
Transform GitHub dependency graph manifests into cartography manifest format.
|
|
609
|
+
:param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
|
|
610
|
+
:param repo_url: The URL of the GitHub repo
|
|
611
|
+
:param out_manifests_list: Output array to append transformed results to
|
|
612
|
+
:return: Nothing
|
|
613
|
+
"""
|
|
614
|
+
if not dependency_manifests or not dependency_manifests.get("nodes"):
|
|
615
|
+
return
|
|
616
|
+
|
|
617
|
+
manifests_added = 0
|
|
618
|
+
|
|
619
|
+
for manifest in dependency_manifests["nodes"]:
|
|
620
|
+
blob_path = manifest.get("blobPath", "")
|
|
621
|
+
if not blob_path:
|
|
622
|
+
continue
|
|
623
|
+
|
|
624
|
+
# Count dependencies in this manifest
|
|
625
|
+
dependencies = manifest.get("dependencies", {})
|
|
626
|
+
dependencies_count = len(dependencies.get("nodes", []) if dependencies else [])
|
|
627
|
+
|
|
628
|
+
# Create unique manifest ID by combining repo URL and blob path
|
|
629
|
+
manifest_id = f"{repo_url}#{blob_path}"
|
|
630
|
+
|
|
631
|
+
# Extract filename from blob path
|
|
632
|
+
filename = blob_path.split("/")[-1] if blob_path else "None"
|
|
633
|
+
|
|
634
|
+
out_manifests_list.append(
|
|
635
|
+
{
|
|
636
|
+
"id": manifest_id,
|
|
637
|
+
"blob_path": blob_path,
|
|
638
|
+
"filename": filename,
|
|
639
|
+
"dependencies_count": dependencies_count,
|
|
640
|
+
"repo_url": repo_url,
|
|
641
|
+
}
|
|
642
|
+
)
|
|
643
|
+
manifests_added += 1
|
|
644
|
+
|
|
645
|
+
if manifests_added > 0:
|
|
646
|
+
repo_name = repo_url.split("/")[-1] if repo_url else "repository"
|
|
647
|
+
logger.info(f"Found {manifests_added} dependency manifests in {repo_name}")
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _transform_dependency_graph(
|
|
651
|
+
dependency_manifests: Optional[Dict],
|
|
652
|
+
repo_url: str,
|
|
653
|
+
out_dependencies_list: List[Dict],
|
|
654
|
+
) -> None:
|
|
655
|
+
"""
|
|
656
|
+
Transform GitHub dependency graph manifests into cartography dependency format.
|
|
657
|
+
:param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
|
|
658
|
+
:param repo_url: The URL of the GitHub repo
|
|
659
|
+
:param out_dependencies_list: Output array to append transformed results to
|
|
660
|
+
:return: Nothing
|
|
661
|
+
"""
|
|
662
|
+
if not dependency_manifests or not dependency_manifests.get("nodes"):
|
|
663
|
+
return
|
|
664
|
+
|
|
665
|
+
dependencies_added = 0
|
|
666
|
+
|
|
667
|
+
for manifest in dependency_manifests["nodes"]:
|
|
668
|
+
dependencies = manifest.get("dependencies", {})
|
|
669
|
+
if not dependencies or not dependencies.get("nodes"):
|
|
670
|
+
continue
|
|
671
|
+
|
|
672
|
+
manifest_path = manifest.get("blobPath", "")
|
|
673
|
+
|
|
674
|
+
for dep in dependencies["nodes"]:
|
|
675
|
+
package_name = dep.get("packageName")
|
|
676
|
+
if not package_name:
|
|
677
|
+
continue
|
|
678
|
+
|
|
679
|
+
requirements = dep.get("requirements", "")
|
|
680
|
+
package_manager = dep.get("packageManager", "").upper()
|
|
681
|
+
|
|
682
|
+
# Create ecosystem-specific canonical name
|
|
683
|
+
canonical_name = _canonicalize_dependency_name(
|
|
684
|
+
package_name, package_manager
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
# Create ecosystem identifier
|
|
688
|
+
ecosystem = package_manager.lower() if package_manager else "unknown"
|
|
689
|
+
|
|
690
|
+
# Create simple dependency ID using canonical name and requirements
|
|
691
|
+
# This allows the same dependency to be shared across multiple repos
|
|
692
|
+
requirements_for_id = (requirements or "").strip()
|
|
693
|
+
dependency_id = (
|
|
694
|
+
f"{canonical_name}|{requirements_for_id}"
|
|
695
|
+
if requirements_for_id
|
|
696
|
+
else canonical_name
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
# Normalize requirements field (prefer None over empty string)
|
|
700
|
+
normalized_requirements = requirements if requirements else None
|
|
701
|
+
|
|
702
|
+
# Create manifest ID for the HAS_DEP relationship
|
|
703
|
+
manifest_id = f"{repo_url}#{manifest_path}"
|
|
704
|
+
|
|
705
|
+
out_dependencies_list.append(
|
|
706
|
+
{
|
|
707
|
+
"id": dependency_id,
|
|
708
|
+
"name": canonical_name,
|
|
709
|
+
"original_name": package_name, # Keep original for reference
|
|
710
|
+
"requirements": normalized_requirements,
|
|
711
|
+
"ecosystem": ecosystem,
|
|
712
|
+
"package_manager": package_manager,
|
|
713
|
+
"manifest_path": manifest_path,
|
|
714
|
+
"manifest_id": manifest_id,
|
|
715
|
+
"repo_url": repo_url,
|
|
716
|
+
"manifest_file": (
|
|
717
|
+
manifest_path.split("/")[-1] if manifest_path else ""
|
|
718
|
+
),
|
|
719
|
+
}
|
|
720
|
+
)
|
|
721
|
+
dependencies_added += 1
|
|
722
|
+
|
|
723
|
+
if dependencies_added > 0:
|
|
724
|
+
repo_name = repo_url.split("/")[-1] if repo_url else "repository"
|
|
725
|
+
logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
|
|
729
|
+
"""
|
|
730
|
+
Canonicalize dependency names based on ecosystem conventions.
|
|
731
|
+
"""
|
|
732
|
+
if not name:
|
|
733
|
+
return name
|
|
734
|
+
|
|
735
|
+
# For Python packages, use existing canonicalization
|
|
736
|
+
if package_manager in ["PIP", "CONDA"]:
|
|
737
|
+
try:
|
|
738
|
+
from packaging.utils import canonicalize_name
|
|
739
|
+
|
|
740
|
+
return str(canonicalize_name(name))
|
|
741
|
+
except ImportError:
|
|
742
|
+
# Fallback if packaging not available
|
|
743
|
+
return name.lower().replace("_", "-")
|
|
744
|
+
|
|
745
|
+
# For other ecosystems, use lowercase
|
|
746
|
+
return name.lower()
|
|
747
|
+
|
|
748
|
+
|
|
536
749
|
def _transform_python_requirements(
|
|
537
750
|
requirements_list: List[str],
|
|
538
751
|
repo_url: str,
|
|
@@ -667,11 +880,15 @@ def load_github_repos(
|
|
|
667
880
|
ON CREATE SET r.firstseen = timestamp()
|
|
668
881
|
SET r.lastupdated = r.UpdateTag
|
|
669
882
|
"""
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
883
|
+
|
|
884
|
+
def _ingest_repos_tx(tx: neo4j.Transaction) -> None:
|
|
885
|
+
tx.run(
|
|
886
|
+
ingest_repo,
|
|
887
|
+
RepoData=repo_data,
|
|
888
|
+
UpdateTag=update_tag,
|
|
889
|
+
).consume()
|
|
890
|
+
|
|
891
|
+
execute_write_with_retry(neo4j_session, _ingest_repos_tx)
|
|
675
892
|
|
|
676
893
|
|
|
677
894
|
@timeit
|
|
@@ -701,11 +918,14 @@ def load_github_languages(
|
|
|
701
918
|
ON CREATE SET r.firstseen = timestamp()
|
|
702
919
|
SET r.lastupdated = $UpdateTag"""
|
|
703
920
|
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
921
|
+
def _ingest_languages_tx(tx: neo4j.Transaction) -> None:
|
|
922
|
+
tx.run(
|
|
923
|
+
ingest_languages,
|
|
924
|
+
Languages=repo_languages,
|
|
925
|
+
UpdateTag=update_tag,
|
|
926
|
+
).consume()
|
|
927
|
+
|
|
928
|
+
execute_write_with_retry(neo4j_session, _ingest_languages_tx)
|
|
709
929
|
|
|
710
930
|
|
|
711
931
|
@timeit
|
|
@@ -721,31 +941,43 @@ def load_github_owners(
|
|
|
721
941
|
:param repo_owners: list of owner to repo mappings
|
|
722
942
|
:return: Nothing
|
|
723
943
|
"""
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
)
|
|
944
|
+
ingest_owner_template = Template(
|
|
945
|
+
"""
|
|
946
|
+
MERGE (user:$account_type{id: $Id})
|
|
947
|
+
ON CREATE SET user.firstseen = timestamp()
|
|
948
|
+
SET user.username = $UserName,
|
|
949
|
+
user.lastupdated = $UpdateTag
|
|
950
|
+
WITH user
|
|
951
|
+
|
|
952
|
+
MATCH (repo:GitHubRepository{id: $RepoId})
|
|
953
|
+
MERGE (user)<-[r:OWNER]-(repo)
|
|
954
|
+
ON CREATE SET r.firstseen = timestamp()
|
|
955
|
+
SET r.lastupdated = $UpdateTag""",
|
|
956
|
+
)
|
|
738
957
|
|
|
739
|
-
|
|
958
|
+
account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
|
|
740
959
|
|
|
741
|
-
|
|
960
|
+
def _ingest_owner_tx(
|
|
961
|
+
tx: neo4j.Transaction,
|
|
962
|
+
owner_record: Dict,
|
|
963
|
+
owner_label: str,
|
|
964
|
+
) -> None:
|
|
965
|
+
tx.run(
|
|
742
966
|
ingest_owner_template.safe_substitute(
|
|
743
|
-
account_type=
|
|
967
|
+
account_type=owner_label,
|
|
744
968
|
),
|
|
745
|
-
Id=
|
|
746
|
-
UserName=
|
|
747
|
-
RepoId=
|
|
969
|
+
Id=owner_record["owner_id"],
|
|
970
|
+
UserName=owner_record["owner"],
|
|
971
|
+
RepoId=owner_record["repo_id"],
|
|
748
972
|
UpdateTag=update_tag,
|
|
973
|
+
).consume()
|
|
974
|
+
|
|
975
|
+
for owner in repo_owners:
|
|
976
|
+
execute_write_with_retry(
|
|
977
|
+
neo4j_session,
|
|
978
|
+
_ingest_owner_tx,
|
|
979
|
+
owner,
|
|
980
|
+
account_type[owner["type"]],
|
|
749
981
|
)
|
|
750
982
|
|
|
751
983
|
|
|
@@ -776,12 +1008,159 @@ def load_collaborators(
|
|
|
776
1008
|
SET o.lastupdated = $UpdateTag
|
|
777
1009
|
""",
|
|
778
1010
|
)
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
1011
|
+
|
|
1012
|
+
def _ingest_collaborators_tx(
|
|
1013
|
+
tx: neo4j.Transaction,
|
|
1014
|
+
relationship_label: str,
|
|
1015
|
+
collaborator_data: List[Dict],
|
|
1016
|
+
) -> None:
|
|
1017
|
+
tx.run(
|
|
782
1018
|
query.safe_substitute(rel_label=relationship_label),
|
|
783
|
-
UserData=
|
|
1019
|
+
UserData=collaborator_data,
|
|
784
1020
|
UpdateTag=update_tag,
|
|
1021
|
+
).consume()
|
|
1022
|
+
|
|
1023
|
+
for collab_type, collab_data in collaborators.items():
|
|
1024
|
+
relationship_label = f"{affiliation}_COLLAB_{collab_type}"
|
|
1025
|
+
execute_write_with_retry(
|
|
1026
|
+
neo4j_session,
|
|
1027
|
+
_ingest_collaborators_tx,
|
|
1028
|
+
relationship_label,
|
|
1029
|
+
collab_data,
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
@timeit
|
|
1034
|
+
def load_python_requirements(
|
|
1035
|
+
neo4j_session: neo4j.Session,
|
|
1036
|
+
update_tag: int,
|
|
1037
|
+
requirements_objects: List[Dict],
|
|
1038
|
+
) -> None:
|
|
1039
|
+
query = """
|
|
1040
|
+
UNWIND $Requirements AS req
|
|
1041
|
+
MERGE (lib:PythonLibrary:Dependency{id: req.id})
|
|
1042
|
+
ON CREATE SET lib.firstseen = timestamp(),
|
|
1043
|
+
lib.name = req.name
|
|
1044
|
+
SET lib.lastupdated = $UpdateTag,
|
|
1045
|
+
lib.version = req.version
|
|
1046
|
+
|
|
1047
|
+
WITH lib, req
|
|
1048
|
+
MATCH (repo:GitHubRepository{id: req.repo_url})
|
|
1049
|
+
MERGE (repo)-[r:REQUIRES]->(lib)
|
|
1050
|
+
ON CREATE SET r.firstseen = timestamp()
|
|
1051
|
+
SET r.lastupdated = $UpdateTag,
|
|
1052
|
+
r.specifier = req.specifier
|
|
1053
|
+
"""
|
|
1054
|
+
|
|
1055
|
+
def _ingest_requirements_tx(tx: neo4j.Transaction) -> None:
|
|
1056
|
+
tx.run(
|
|
1057
|
+
query,
|
|
1058
|
+
Requirements=requirements_objects,
|
|
1059
|
+
UpdateTag=update_tag,
|
|
1060
|
+
).consume()
|
|
1061
|
+
|
|
1062
|
+
execute_write_with_retry(neo4j_session, _ingest_requirements_tx)
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
@timeit
|
|
1066
|
+
def load_github_dependencies(
|
|
1067
|
+
neo4j_session: neo4j.Session,
|
|
1068
|
+
update_tag: int,
|
|
1069
|
+
dependencies: List[Dict],
|
|
1070
|
+
) -> None:
|
|
1071
|
+
"""
|
|
1072
|
+
Ingest GitHub dependency data into Neo4j using the new data model
|
|
1073
|
+
:param neo4j_session: Neo4J session object for server communication
|
|
1074
|
+
:param update_tag: Timestamp used to determine data freshness
|
|
1075
|
+
:param dependencies: List of dependency objects from GitHub's dependency graph
|
|
1076
|
+
:return: Nothing
|
|
1077
|
+
"""
|
|
1078
|
+
# Group dependencies by both repo_url and manifest_id for schema-based loading
|
|
1079
|
+
dependencies_by_repo_and_manifest = defaultdict(list)
|
|
1080
|
+
|
|
1081
|
+
for dep in dependencies:
|
|
1082
|
+
repo_url = dep["repo_url"]
|
|
1083
|
+
manifest_id = dep["manifest_id"]
|
|
1084
|
+
# Create a key combining both repo_url and manifest_id
|
|
1085
|
+
group_key = (repo_url, manifest_id)
|
|
1086
|
+
# Remove repo_url and manifest_id from the dependency object since we'll pass them as kwargs
|
|
1087
|
+
dep_without_kwargs = {
|
|
1088
|
+
k: v for k, v in dep.items() if k not in ["repo_url", "manifest_id"]
|
|
1089
|
+
}
|
|
1090
|
+
dependencies_by_repo_and_manifest[group_key].append(dep_without_kwargs)
|
|
1091
|
+
|
|
1092
|
+
# Load dependencies for each repository/manifest combination separately
|
|
1093
|
+
for (
|
|
1094
|
+
repo_url,
|
|
1095
|
+
manifest_id,
|
|
1096
|
+
), group_dependencies in dependencies_by_repo_and_manifest.items():
|
|
1097
|
+
load_data(
|
|
1098
|
+
neo4j_session,
|
|
1099
|
+
GitHubDependencySchema(),
|
|
1100
|
+
group_dependencies,
|
|
1101
|
+
lastupdated=update_tag,
|
|
1102
|
+
repo_url=repo_url,
|
|
1103
|
+
manifest_id=manifest_id,
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
@timeit
|
|
1108
|
+
def load_github_dependency_manifests(
|
|
1109
|
+
neo4j_session: neo4j.Session,
|
|
1110
|
+
update_tag: int,
|
|
1111
|
+
manifests: List[Dict],
|
|
1112
|
+
) -> None:
|
|
1113
|
+
"""
|
|
1114
|
+
Ingest GitHub dependency manifests into Neo4j
|
|
1115
|
+
"""
|
|
1116
|
+
manifests_by_repo = defaultdict(list)
|
|
1117
|
+
|
|
1118
|
+
for manifest in manifests:
|
|
1119
|
+
repo_url = manifest["repo_url"]
|
|
1120
|
+
manifests_by_repo[repo_url].append(manifest)
|
|
1121
|
+
|
|
1122
|
+
# Load manifests for each repository separately
|
|
1123
|
+
for repo_url, repo_manifests in manifests_by_repo.items():
|
|
1124
|
+
load_data(
|
|
1125
|
+
neo4j_session,
|
|
1126
|
+
DependencyGraphManifestSchema(),
|
|
1127
|
+
repo_manifests,
|
|
1128
|
+
lastupdated=update_tag,
|
|
1129
|
+
repo_url=repo_url,
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
@timeit
|
|
1134
|
+
def cleanup_github_dependencies(
|
|
1135
|
+
neo4j_session: neo4j.Session,
|
|
1136
|
+
common_job_parameters: Dict[str, Any],
|
|
1137
|
+
repo_urls: List[str],
|
|
1138
|
+
) -> None:
|
|
1139
|
+
# Run cleanup for each repository separately
|
|
1140
|
+
for repo_url in repo_urls:
|
|
1141
|
+
cleanup_params = {**common_job_parameters, "repo_url": repo_url}
|
|
1142
|
+
GraphJob.from_node_schema(GitHubDependencySchema(), cleanup_params).run(
|
|
1143
|
+
neo4j_session
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
|
|
1147
|
+
@timeit
|
|
1148
|
+
def cleanup_github_manifests(
|
|
1149
|
+
neo4j_session: neo4j.Session,
|
|
1150
|
+
common_job_parameters: Dict[str, Any],
|
|
1151
|
+
repo_urls: List[str],
|
|
1152
|
+
) -> None:
|
|
1153
|
+
"""
|
|
1154
|
+
Delete GitHub dependency manifests and their relationships from the graph if they were not updated in the last sync.
|
|
1155
|
+
:param neo4j_session: Neo4j session
|
|
1156
|
+
:param common_job_parameters: Common job parameters containing UPDATE_TAG
|
|
1157
|
+
:param repo_urls: List of repository URLs to clean up manifests for
|
|
1158
|
+
"""
|
|
1159
|
+
# Run cleanup for each repository separately
|
|
1160
|
+
for repo_url in repo_urls:
|
|
1161
|
+
cleanup_params = {**common_job_parameters, "repo_url": repo_url}
|
|
1162
|
+
GraphJob.from_node_schema(DependencyGraphManifestSchema(), cleanup_params).run(
|
|
1163
|
+
neo4j_session
|
|
785
1164
|
)
|
|
786
1165
|
|
|
787
1166
|
|
|
@@ -823,33 +1202,15 @@ def load(
|
|
|
823
1202
|
common_job_parameters["UPDATE_TAG"],
|
|
824
1203
|
repo_data["python_requirements"],
|
|
825
1204
|
)
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
UNWIND $Requirements AS req
|
|
836
|
-
MERGE (lib:PythonLibrary:Dependency{id: req.id})
|
|
837
|
-
ON CREATE SET lib.firstseen = timestamp(),
|
|
838
|
-
lib.name = req.name
|
|
839
|
-
SET lib.lastupdated = $UpdateTag,
|
|
840
|
-
lib.version = req.version
|
|
841
|
-
|
|
842
|
-
WITH lib, req
|
|
843
|
-
MATCH (repo:GitHubRepository{id: req.repo_url})
|
|
844
|
-
MERGE (repo)-[r:REQUIRES]->(lib)
|
|
845
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
846
|
-
SET r.lastupdated = $UpdateTag,
|
|
847
|
-
r.specifier = req.specifier
|
|
848
|
-
"""
|
|
849
|
-
neo4j_session.run(
|
|
850
|
-
query,
|
|
851
|
-
Requirements=requirements_objects,
|
|
852
|
-
UpdateTag=update_tag,
|
|
1205
|
+
load_github_dependency_manifests(
|
|
1206
|
+
neo4j_session,
|
|
1207
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1208
|
+
repo_data["manifests"],
|
|
1209
|
+
)
|
|
1210
|
+
load_github_dependencies(
|
|
1211
|
+
neo4j_session,
|
|
1212
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1213
|
+
repo_data["dependencies"],
|
|
853
1214
|
)
|
|
854
1215
|
|
|
855
1216
|
|
|
@@ -896,4 +1257,21 @@ def sync(
|
|
|
896
1257
|
)
|
|
897
1258
|
repo_data = transform(repos_json, direct_collabs, outside_collabs)
|
|
898
1259
|
load(neo4j_session, common_job_parameters, repo_data)
|
|
1260
|
+
|
|
1261
|
+
# Collect repository URLs that have dependencies for cleanup
|
|
1262
|
+
repo_urls_with_dependencies = list(
|
|
1263
|
+
{dep["repo_url"] for dep in repo_data["dependencies"]}
|
|
1264
|
+
)
|
|
1265
|
+
cleanup_github_dependencies(
|
|
1266
|
+
neo4j_session, common_job_parameters, repo_urls_with_dependencies
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
# Collect repository URLs that have manifests for cleanup
|
|
1270
|
+
repo_urls_with_manifests = list(
|
|
1271
|
+
{manifest["repo_url"] for manifest in repo_data["manifests"]}
|
|
1272
|
+
)
|
|
1273
|
+
cleanup_github_manifests(
|
|
1274
|
+
neo4j_session, common_job_parameters, repo_urls_with_manifests
|
|
1275
|
+
)
|
|
1276
|
+
|
|
899
1277
|
run_cleanup_job("github_repos_cleanup.json", neo4j_session, common_job_parameters)
|