cartography 0.93.0rc1__py3-none-any.whl → 0.123.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cartography/__main__.py +1 -2
- cartography/_version.py +34 -0
- cartography/cli.py +903 -225
- cartography/client/aws/__init__.py +19 -0
- cartography/client/aws/ecr.py +51 -0
- cartography/client/core/tx.py +400 -27
- cartography/config.py +215 -10
- cartography/data/azure_permission_relationships.yaml +20 -0
- cartography/data/gcp_permission_relationships.yaml +21 -0
- cartography/data/indexes.cypher +1 -200
- cartography/data/jobs/analysis/aws_ec2_asset_exposure.json +17 -2
- cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
- cartography/data/jobs/analysis/gcp_compute_asset_inet_exposure.json +1 -1
- cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
- cartography/data/jobs/cleanup/crowdstrike_import_cleanup.json +0 -5
- cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
- cartography/data/jobs/cleanup/github_repos_cleanup.json +27 -0
- cartography/data/jobs/scoped_analysis/aws_ec2_iaminstanceprofile.json +15 -0
- cartography/data/jobs/scoped_analysis/semgrep_sca_risk_analysis.json +13 -13
- cartography/driftdetect/__main__.py +1 -2
- cartography/driftdetect/add_shortcut.py +10 -2
- cartography/driftdetect/cli.py +72 -75
- cartography/driftdetect/detect_deviations.py +7 -3
- cartography/driftdetect/get_states.py +20 -8
- cartography/driftdetect/model.py +5 -5
- cartography/driftdetect/serializers.py +8 -6
- cartography/driftdetect/storage.py +2 -2
- cartography/graph/cleanupbuilder.py +255 -35
- cartography/graph/job.py +104 -20
- cartography/graph/querybuilder.py +689 -91
- cartography/graph/statement.py +49 -36
- cartography/intel/airbyte/__init__.py +105 -0
- cartography/intel/airbyte/connections.py +120 -0
- cartography/intel/airbyte/destinations.py +81 -0
- cartography/intel/airbyte/organizations.py +59 -0
- cartography/intel/airbyte/sources.py +78 -0
- cartography/intel/airbyte/tags.py +64 -0
- cartography/intel/airbyte/users.py +106 -0
- cartography/intel/airbyte/util.py +122 -0
- cartography/intel/airbyte/workspaces.py +63 -0
- cartography/intel/analysis.py +4 -1
- cartography/intel/anthropic/__init__.py +62 -0
- cartography/intel/anthropic/apikeys.py +72 -0
- cartography/intel/anthropic/users.py +75 -0
- cartography/intel/anthropic/util.py +51 -0
- cartography/intel/anthropic/workspaces.py +95 -0
- cartography/intel/aws/__init__.py +137 -59
- cartography/intel/aws/acm.py +124 -0
- cartography/intel/aws/apigateway.py +482 -217
- cartography/intel/aws/apigatewayv2.py +116 -0
- cartography/intel/aws/cloudtrail.py +105 -0
- cartography/intel/aws/cloudtrail_management_events.py +962 -0
- cartography/intel/aws/cloudwatch.py +239 -0
- cartography/intel/aws/codebuild.py +132 -0
- cartography/intel/aws/cognito.py +201 -0
- cartography/intel/aws/config.py +63 -23
- cartography/intel/aws/dynamodb.py +108 -40
- cartography/intel/aws/ec2/__init__.py +2 -2
- cartography/intel/aws/ec2/auto_scaling_groups.py +254 -189
- cartography/intel/aws/ec2/elastic_ip_addresses.py +44 -14
- cartography/intel/aws/ec2/images.py +74 -39
- cartography/intel/aws/ec2/instances.py +262 -137
- cartography/intel/aws/ec2/internet_gateways.py +44 -13
- cartography/intel/aws/ec2/key_pairs.py +72 -39
- cartography/intel/aws/ec2/launch_templates.py +143 -66
- cartography/intel/aws/ec2/load_balancer_v2s.py +119 -45
- cartography/intel/aws/ec2/load_balancers.py +165 -147
- cartography/intel/aws/ec2/network_acls.py +233 -0
- cartography/intel/aws/ec2/network_interfaces.py +150 -87
- cartography/intel/aws/ec2/reserved_instances.py +48 -17
- cartography/intel/aws/ec2/route_tables.py +327 -0
- cartography/intel/aws/ec2/security_groups.py +189 -121
- cartography/intel/aws/ec2/snapshots.py +93 -91
- cartography/intel/aws/ec2/subnets.py +70 -58
- cartography/intel/aws/ec2/tgw.py +111 -39
- cartography/intel/aws/ec2/util.py +1 -1
- cartography/intel/aws/ec2/volumes.py +69 -41
- cartography/intel/aws/ec2/vpc.py +157 -116
- cartography/intel/aws/ec2/vpc_peerings.py +317 -121
- cartography/intel/aws/ecr.py +336 -93
- cartography/intel/aws/ecr_image_layers.py +923 -0
- cartography/intel/aws/ecs.py +310 -403
- cartography/intel/aws/efs.py +261 -0
- cartography/intel/aws/eks.py +55 -29
- cartography/intel/aws/elasticache.py +130 -83
- cartography/intel/aws/elasticsearch.py +70 -24
- cartography/intel/aws/emr.py +61 -23
- cartography/intel/aws/eventbridge.py +164 -0
- cartography/intel/aws/glue.py +181 -0
- cartography/intel/aws/guardduty.py +443 -0
- cartography/intel/aws/iam.py +978 -464
- cartography/intel/aws/iam_instance_profiles.py +73 -0
- cartography/intel/aws/identitycenter.py +847 -0
- cartography/intel/aws/inspector.py +330 -133
- cartography/intel/aws/kms.py +235 -209
- cartography/intel/aws/lambda_function.py +328 -176
- cartography/intel/aws/organizations.py +40 -19
- cartography/intel/aws/permission_relationships.py +144 -68
- cartography/intel/aws/rds.py +467 -412
- cartography/intel/aws/redshift.py +116 -50
- cartography/intel/aws/resourcegroupstaggingapi.py +198 -82
- cartography/intel/aws/resources.py +80 -42
- cartography/intel/aws/route53.py +419 -318
- cartography/intel/aws/s3.py +489 -96
- cartography/intel/aws/s3accountpublicaccessblock.py +157 -0
- cartography/intel/aws/secretsmanager.py +217 -40
- cartography/intel/aws/securityhub.py +23 -10
- cartography/intel/aws/sns.py +226 -0
- cartography/intel/aws/sqs.py +74 -96
- cartography/intel/aws/ssm.py +142 -33
- cartography/intel/aws/util/arns.py +7 -7
- cartography/intel/aws/util/common.py +31 -4
- cartography/intel/azure/__init__.py +259 -46
- cartography/intel/azure/aks.py +175 -0
- cartography/intel/azure/app_service.py +105 -0
- cartography/intel/azure/compute.py +141 -120
- cartography/intel/azure/container_instances.py +95 -0
- cartography/intel/azure/cosmosdb.py +706 -519
- cartography/intel/azure/data_factory.py +85 -0
- cartography/intel/azure/data_factory_dataset.py +128 -0
- cartography/intel/azure/data_factory_linked_service.py +119 -0
- cartography/intel/azure/data_factory_pipeline.py +142 -0
- cartography/intel/azure/data_lake.py +124 -0
- cartography/intel/azure/event_grid.py +94 -0
- cartography/intel/azure/functions.py +124 -0
- cartography/intel/azure/load_balancers.py +263 -0
- cartography/intel/azure/logic_apps.py +101 -0
- cartography/intel/azure/monitor.py +105 -0
- cartography/intel/azure/network.py +467 -0
- cartography/intel/azure/permission_relationships.py +466 -0
- cartography/intel/azure/rbac.py +309 -0
- cartography/intel/azure/resource_groups.py +82 -0
- cartography/intel/azure/security_center.py +106 -0
- cartography/intel/azure/sql.py +436 -392
- cartography/intel/azure/storage.py +467 -335
- cartography/intel/azure/subscription.py +49 -55
- cartography/intel/azure/tenant.py +46 -28
- cartography/intel/azure/util/common.py +13 -0
- cartography/intel/azure/util/credentials.py +58 -143
- cartography/intel/azure/util/tag.py +41 -0
- cartography/intel/bigfix/__init__.py +2 -2
- cartography/intel/bigfix/computers.py +93 -65
- cartography/intel/cloudflare/__init__.py +74 -0
- cartography/intel/cloudflare/accounts.py +57 -0
- cartography/intel/cloudflare/dnsrecords.py +64 -0
- cartography/intel/cloudflare/members.py +75 -0
- cartography/intel/cloudflare/roles.py +65 -0
- cartography/intel/cloudflare/zones.py +64 -0
- cartography/intel/create_indexes.py +5 -3
- cartography/intel/crowdstrike/__init__.py +26 -12
- cartography/intel/crowdstrike/endpoints.py +17 -45
- cartography/intel/crowdstrike/spotlight.py +13 -5
- cartography/intel/cve/__init__.py +91 -26
- cartography/intel/cve/feed.py +77 -56
- cartography/intel/digitalocean/__init__.py +22 -13
- cartography/intel/digitalocean/compute.py +75 -108
- cartography/intel/digitalocean/management.py +44 -80
- cartography/intel/digitalocean/platform.py +48 -43
- cartography/intel/dns.py +41 -12
- cartography/intel/duo/__init__.py +21 -16
- cartography/intel/duo/api_host.py +14 -9
- cartography/intel/duo/endpoints.py +50 -45
- cartography/intel/duo/groups.py +18 -14
- cartography/intel/duo/phones.py +37 -34
- cartography/intel/duo/tokens.py +26 -23
- cartography/intel/duo/users.py +54 -50
- cartography/intel/duo/web_authn_credentials.py +30 -25
- cartography/intel/entra/__init__.py +160 -0
- cartography/intel/entra/app_role_assignments.py +284 -0
- cartography/intel/entra/applications.py +182 -0
- cartography/intel/entra/federation/__init__.py +0 -0
- cartography/intel/entra/federation/aws_identity_center.py +77 -0
- cartography/intel/entra/groups.py +198 -0
- cartography/intel/entra/ou.py +136 -0
- cartography/intel/entra/service_principals.py +217 -0
- cartography/intel/entra/users.py +259 -0
- cartography/intel/gcp/__init__.py +381 -175
- cartography/intel/gcp/bigtable_app_profile.py +101 -0
- cartography/intel/gcp/bigtable_backup.py +91 -0
- cartography/intel/gcp/bigtable_cluster.py +93 -0
- cartography/intel/gcp/bigtable_instance.py +86 -0
- cartography/intel/gcp/bigtable_table.py +87 -0
- cartography/intel/gcp/cai.py +292 -0
- cartography/intel/gcp/clients.py +112 -0
- cartography/intel/gcp/compute.py +521 -325
- cartography/intel/gcp/crm/__init__.py +0 -0
- cartography/intel/gcp/crm/folders.py +114 -0
- cartography/intel/gcp/crm/orgs.py +70 -0
- cartography/intel/gcp/crm/projects.py +120 -0
- cartography/intel/gcp/dns.py +134 -179
- cartography/intel/gcp/gke.py +100 -107
- cartography/intel/gcp/iam.py +262 -0
- cartography/intel/gcp/permission_relationships.py +394 -0
- cartography/intel/gcp/policy_bindings.py +225 -0
- cartography/intel/gcp/storage.py +103 -158
- cartography/intel/github/__init__.py +66 -27
- cartography/intel/github/commits.py +423 -0
- cartography/intel/github/repos.py +871 -160
- cartography/intel/github/teams.py +386 -53
- cartography/intel/github/users.py +214 -49
- cartography/intel/github/util.py +50 -35
- cartography/intel/googleworkspace/__init__.py +193 -0
- cartography/intel/googleworkspace/devices.py +254 -0
- cartography/intel/googleworkspace/groups.py +568 -0
- cartography/intel/googleworkspace/oauth_apps.py +259 -0
- cartography/intel/googleworkspace/tenant.py +85 -0
- cartography/intel/googleworkspace/users.py +138 -0
- cartography/intel/gsuite/__init__.py +101 -42
- cartography/intel/gsuite/groups.py +291 -0
- cartography/intel/gsuite/users.py +142 -0
- cartography/intel/jamf/__init__.py +19 -1
- cartography/intel/jamf/computers.py +37 -8
- cartography/intel/jamf/util.py +7 -2
- cartography/intel/kandji/__init__.py +6 -3
- cartography/intel/kandji/devices.py +40 -10
- cartography/intel/keycloak/__init__.py +153 -0
- cartography/intel/keycloak/authenticationexecutions.py +322 -0
- cartography/intel/keycloak/authenticationflows.py +77 -0
- cartography/intel/keycloak/clients.py +187 -0
- cartography/intel/keycloak/groups.py +126 -0
- cartography/intel/keycloak/identityproviders.py +94 -0
- cartography/intel/keycloak/organizations.py +163 -0
- cartography/intel/keycloak/realms.py +61 -0
- cartography/intel/keycloak/roles.py +202 -0
- cartography/intel/keycloak/scopes.py +73 -0
- cartography/intel/keycloak/users.py +70 -0
- cartography/intel/keycloak/util.py +47 -0
- cartography/intel/kubernetes/__init__.py +60 -14
- cartography/intel/kubernetes/clusters.py +86 -0
- cartography/intel/kubernetes/eks.py +402 -0
- cartography/intel/kubernetes/namespaces.py +60 -55
- cartography/intel/kubernetes/pods.py +171 -75
- cartography/intel/kubernetes/rbac.py +597 -0
- cartography/intel/kubernetes/secrets.py +95 -45
- cartography/intel/kubernetes/services.py +131 -63
- cartography/intel/kubernetes/util.py +142 -14
- cartography/intel/lastpass/__init__.py +2 -2
- cartography/intel/lastpass/users.py +23 -12
- cartography/intel/oci/__init__.py +44 -11
- cartography/intel/oci/iam.py +157 -47
- cartography/intel/oci/organizations.py +16 -7
- cartography/intel/oci/utils.py +71 -25
- cartography/intel/okta/__init__.py +66 -15
- cartography/intel/okta/applications.py +57 -25
- cartography/intel/okta/awssaml.py +105 -41
- cartography/intel/okta/factors.py +19 -5
- cartography/intel/okta/groups.py +61 -31
- cartography/intel/okta/organization.py +8 -2
- cartography/intel/okta/origins.py +9 -3
- cartography/intel/okta/roles.py +20 -7
- cartography/intel/okta/users.py +31 -10
- cartography/intel/okta/utils.py +6 -4
- cartography/intel/ontology/__init__.py +44 -0
- cartography/intel/ontology/devices.py +54 -0
- cartography/intel/ontology/users.py +54 -0
- cartography/intel/ontology/utils.py +176 -0
- cartography/intel/openai/__init__.py +86 -0
- cartography/intel/openai/adminapikeys.py +89 -0
- cartography/intel/openai/apikeys.py +96 -0
- cartography/intel/openai/projects.py +97 -0
- cartography/intel/openai/serviceaccounts.py +82 -0
- cartography/intel/openai/users.py +75 -0
- cartography/intel/openai/util.py +45 -0
- cartography/intel/pagerduty/__init__.py +8 -7
- cartography/intel/pagerduty/escalation_policies.py +31 -12
- cartography/intel/pagerduty/schedules.py +21 -8
- cartography/intel/pagerduty/services.py +18 -7
- cartography/intel/pagerduty/teams.py +13 -5
- cartography/intel/pagerduty/users.py +6 -2
- cartography/intel/pagerduty/vendors.py +6 -2
- cartography/intel/scaleway/__init__.py +127 -0
- cartography/intel/scaleway/iam/__init__.py +0 -0
- cartography/intel/scaleway/iam/apikeys.py +71 -0
- cartography/intel/scaleway/iam/applications.py +71 -0
- cartography/intel/scaleway/iam/groups.py +71 -0
- cartography/intel/scaleway/iam/users.py +71 -0
- cartography/intel/scaleway/instances/__init__.py +0 -0
- cartography/intel/scaleway/instances/flexibleips.py +86 -0
- cartography/intel/scaleway/instances/instances.py +92 -0
- cartography/intel/scaleway/projects.py +79 -0
- cartography/intel/scaleway/storage/__init__.py +0 -0
- cartography/intel/scaleway/storage/snapshots.py +86 -0
- cartography/intel/scaleway/storage/volumes.py +84 -0
- cartography/intel/scaleway/utils.py +37 -0
- cartography/intel/semgrep/__init__.py +30 -5
- cartography/intel/semgrep/dependencies.py +255 -0
- cartography/intel/semgrep/deployment.py +69 -0
- cartography/intel/semgrep/findings.py +157 -117
- cartography/intel/sentinelone/__init__.py +75 -0
- cartography/intel/sentinelone/account.py +140 -0
- cartography/intel/sentinelone/agent.py +139 -0
- cartography/intel/sentinelone/api.py +124 -0
- cartography/intel/sentinelone/application.py +248 -0
- cartography/intel/sentinelone/cve.py +119 -0
- cartography/intel/sentinelone/utils.py +28 -0
- cartography/intel/slack/__init__.py +78 -0
- cartography/intel/slack/channels.py +80 -0
- cartography/intel/slack/groups.py +90 -0
- cartography/intel/slack/teams.py +65 -0
- cartography/intel/slack/users.py +57 -0
- cartography/intel/slack/utils.py +29 -0
- cartography/intel/snipeit/__init__.py +44 -0
- cartography/intel/snipeit/asset.py +80 -0
- cartography/intel/snipeit/user.py +78 -0
- cartography/intel/snipeit/util.py +40 -0
- cartography/intel/spacelift/__init__.py +161 -0
- cartography/intel/spacelift/account.py +73 -0
- cartography/intel/spacelift/ec2_ownership.py +280 -0
- cartography/intel/spacelift/runs.py +463 -0
- cartography/intel/spacelift/spaces.py +112 -0
- cartography/intel/spacelift/stacks.py +119 -0
- cartography/intel/spacelift/util.py +122 -0
- cartography/intel/spacelift/workerpools.py +131 -0
- cartography/intel/spacelift/workers.py +128 -0
- cartography/intel/tailscale/__init__.py +77 -0
- cartography/intel/tailscale/acls.py +146 -0
- cartography/intel/tailscale/devices.py +127 -0
- cartography/intel/tailscale/postureintegrations.py +81 -0
- cartography/intel/tailscale/tailnets.py +76 -0
- cartography/intel/tailscale/users.py +80 -0
- cartography/intel/tailscale/utils.py +132 -0
- cartography/intel/trivy/__init__.py +272 -0
- cartography/intel/trivy/scanner.py +386 -0
- cartography/models/airbyte/__init__.py +0 -0
- cartography/models/airbyte/connection.py +138 -0
- cartography/models/airbyte/destination.py +75 -0
- cartography/models/airbyte/organization.py +19 -0
- cartography/models/airbyte/source.py +75 -0
- cartography/models/airbyte/stream.py +74 -0
- cartography/models/airbyte/tag.py +69 -0
- cartography/models/airbyte/user.py +115 -0
- cartography/models/airbyte/workspace.py +46 -0
- cartography/models/anthropic/__init__.py +0 -0
- cartography/models/anthropic/apikey.py +94 -0
- cartography/models/anthropic/organization.py +19 -0
- cartography/models/anthropic/user.py +52 -0
- cartography/models/anthropic/workspace.py +90 -0
- cartography/models/aws/acm/__init__.py +0 -0
- cartography/models/aws/acm/certificate.py +75 -0
- cartography/models/aws/apigateway/__init__.py +0 -0
- cartography/models/aws/apigateway/apigateway.py +51 -0
- cartography/models/aws/apigateway/apigatewaycertificate.py +72 -0
- cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
- cartography/models/aws/apigateway/apigatewayintegration.py +79 -0
- cartography/models/aws/apigateway/apigatewaymethod.py +74 -0
- cartography/models/aws/apigateway/apigatewayresource.py +70 -0
- cartography/models/aws/apigateway/apigatewaystage.py +75 -0
- cartography/models/aws/apigatewayv2/__init__.py +0 -0
- cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
- cartography/models/aws/cloudtrail/__init__.py +0 -0
- cartography/models/aws/cloudtrail/management_events.py +153 -0
- cartography/models/aws/cloudtrail/trail.py +106 -0
- cartography/models/aws/cloudwatch/__init__.py +0 -0
- cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
- cartography/models/aws/cloudwatch/loggroup.py +52 -0
- cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
- cartography/models/aws/codebuild/__init__.py +0 -0
- cartography/models/aws/codebuild/project.py +49 -0
- cartography/models/aws/cognito/__init__.py +0 -0
- cartography/models/aws/cognito/identity_pool.py +70 -0
- cartography/models/aws/cognito/user_pool.py +47 -0
- cartography/models/aws/dynamodb/gsi.py +30 -22
- cartography/models/aws/dynamodb/tables.py +27 -17
- cartography/models/aws/ec2/auto_scaling_groups.py +224 -0
- cartography/models/aws/ec2/images.py +36 -34
- cartography/models/aws/ec2/instances.py +85 -38
- cartography/models/aws/ec2/keypair.py +59 -0
- cartography/models/aws/ec2/keypair_instance.py +76 -0
- cartography/models/aws/ec2/launch_configurations.py +59 -0
- cartography/models/aws/ec2/launch_template_versions.py +48 -38
- cartography/models/aws/ec2/launch_templates.py +21 -17
- cartography/models/aws/ec2/load_balancer_listeners.py +72 -0
- cartography/models/aws/ec2/load_balancers.py +112 -0
- cartography/models/aws/ec2/network_acl_rules.py +106 -0
- cartography/models/aws/ec2/network_acls.py +95 -0
- cartography/models/aws/ec2/networkinterface_instance.py +52 -39
- cartography/models/aws/ec2/networkinterfaces.py +57 -37
- cartography/models/aws/ec2/privateip_networkinterface.py +32 -22
- cartography/models/aws/ec2/reservations.py +18 -14
- cartography/models/aws/ec2/route_table_associations.py +97 -0
- cartography/models/aws/ec2/route_tables.py +128 -0
- cartography/models/aws/ec2/routes.py +85 -0
- cartography/models/aws/ec2/security_group_rules.py +109 -0
- cartography/models/aws/ec2/security_groups.py +90 -0
- cartography/models/aws/ec2/securitygroup_instance.py +29 -20
- cartography/models/aws/ec2/securitygroup_networkinterface.py +24 -15
- cartography/models/aws/ec2/snapshots.py +58 -0
- cartography/models/aws/ec2/subnet_instance.py +26 -19
- cartography/models/aws/ec2/subnet_networkinterface.py +42 -31
- cartography/models/aws/ec2/subnets.py +65 -0
- cartography/models/aws/ec2/volumes.py +67 -40
- cartography/models/aws/ec2/vpc.py +46 -0
- cartography/models/aws/ec2/vpc_cidr.py +102 -0
- cartography/models/aws/ec2/vpc_peering.py +157 -0
- cartography/models/aws/ecr/__init__.py +0 -0
- cartography/models/aws/ecr/image.py +146 -0
- cartography/models/aws/ecr/image_layer.py +107 -0
- cartography/models/aws/ecr/repository.py +72 -0
- cartography/models/aws/ecr/repository_image.py +95 -0
- cartography/models/aws/ecs/__init__.py +0 -0
- cartography/models/aws/ecs/clusters.py +64 -0
- cartography/models/aws/ecs/container_definitions.py +93 -0
- cartography/models/aws/ecs/container_instances.py +84 -0
- cartography/models/aws/ecs/containers.py +101 -0
- cartography/models/aws/ecs/services.py +134 -0
- cartography/models/aws/ecs/task_definitions.py +135 -0
- cartography/models/aws/ecs/tasks.py +134 -0
- cartography/models/aws/efs/__init__.py +0 -0
- cartography/models/aws/efs/access_point.py +77 -0
- cartography/models/aws/efs/file_system.py +60 -0
- cartography/models/aws/efs/mount_target.py +79 -0
- cartography/models/aws/eks/clusters.py +23 -21
- cartography/models/aws/elasticache/__init__.py +0 -0
- cartography/models/aws/elasticache/cluster.py +65 -0
- cartography/models/aws/elasticache/topic.py +67 -0
- cartography/models/aws/emr.py +32 -30
- cartography/models/aws/eventbridge/__init__.py +0 -0
- cartography/models/aws/eventbridge/rule.py +77 -0
- cartography/models/aws/eventbridge/target.py +71 -0
- cartography/models/aws/glue/__init__.py +0 -0
- cartography/models/aws/glue/connection.py +51 -0
- cartography/models/aws/glue/job.py +69 -0
- cartography/models/aws/guardduty/__init__.py +1 -0
- cartography/models/aws/guardduty/detectors.py +50 -0
- cartography/models/aws/guardduty/findings.py +121 -0
- cartography/models/aws/iam/__init__.py +0 -0
- cartography/models/aws/iam/access_key.py +103 -0
- cartography/models/aws/iam/account_role.py +24 -0
- cartography/models/aws/iam/federated_principal.py +60 -0
- cartography/models/aws/iam/group.py +60 -0
- cartography/models/aws/iam/group_membership.py +27 -0
- cartography/models/aws/iam/inline_policy.py +78 -0
- cartography/models/aws/iam/instanceprofile.py +76 -0
- cartography/models/aws/iam/managed_policy.py +51 -0
- cartography/models/aws/iam/policy_statement.py +57 -0
- cartography/models/aws/iam/role.py +83 -0
- cartography/models/aws/iam/root_principal.py +52 -0
- cartography/models/aws/iam/service_principal.py +30 -0
- cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
- cartography/models/aws/iam/user.py +59 -0
- cartography/models/aws/identitycenter/__init__.py +0 -0
- cartography/models/aws/identitycenter/awsidentitycenter.py +49 -0
- cartography/models/aws/identitycenter/awspermissionset.py +162 -0
- cartography/models/aws/identitycenter/awssogroup.py +70 -0
- cartography/models/aws/identitycenter/awsssouser.py +110 -0
- cartography/models/aws/inspector/findings.py +124 -58
- cartography/models/aws/inspector/packages.py +18 -42
- cartography/models/aws/kms/__init__.py +0 -0
- cartography/models/aws/kms/aliases.py +86 -0
- cartography/models/aws/kms/grants.py +65 -0
- cartography/models/aws/kms/keys.py +88 -0
- cartography/models/aws/lambda_function/__init__.py +0 -0
- cartography/models/aws/lambda_function/alias.py +74 -0
- cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
- cartography/models/aws/lambda_function/lambda_function.py +91 -0
- cartography/models/aws/lambda_function/layer.py +72 -0
- cartography/models/aws/rds/__init__.py +0 -0
- cartography/models/aws/rds/cluster.py +91 -0
- cartography/models/aws/rds/event_subscription.py +146 -0
- cartography/models/aws/rds/instance.py +156 -0
- cartography/models/aws/rds/snapshot.py +108 -0
- cartography/models/aws/rds/subnet_group.py +101 -0
- cartography/models/aws/route53/__init__.py +0 -0
- cartography/models/aws/route53/dnsrecord.py +235 -0
- cartography/models/aws/route53/nameserver.py +63 -0
- cartography/models/aws/route53/subzone.py +40 -0
- cartography/models/aws/route53/zone.py +47 -0
- cartography/models/aws/s3/__init__.py +0 -0
- cartography/models/aws/s3/account_public_access_block.py +51 -0
- cartography/models/aws/s3/notification.py +24 -0
- cartography/models/aws/secretsmanager/__init__.py +0 -0
- cartography/models/aws/secretsmanager/secret.py +106 -0
- cartography/models/aws/secretsmanager/secret_version.py +114 -0
- cartography/models/aws/sns/__init__.py +0 -0
- cartography/models/aws/sns/topic.py +50 -0
- cartography/models/aws/sns/topic_subscription.py +74 -0
- cartography/models/aws/sqs/__init__.py +0 -0
- cartography/models/aws/sqs/queue.py +89 -0
- cartography/models/aws/ssm/instance_information.py +51 -39
- cartography/models/aws/ssm/instance_patch.py +32 -26
- cartography/models/aws/ssm/parameters.py +84 -0
- cartography/models/azure/__init__.py +0 -0
- cartography/models/azure/aks_cluster.py +54 -0
- cartography/models/azure/aks_nodepool.py +54 -0
- cartography/models/azure/app_service.py +59 -0
- cartography/models/azure/container_instance.py +57 -0
- cartography/models/azure/cosmosdb/__init__.py +0 -0
- cartography/models/azure/cosmosdb/account.py +77 -0
- cartography/models/azure/cosmosdb/accountfailoverpolicy.py +77 -0
- cartography/models/azure/cosmosdb/cassandrakeyspace.py +82 -0
- cartography/models/azure/cosmosdb/cassandratable.py +81 -0
- cartography/models/azure/cosmosdb/corspolicy.py +74 -0
- cartography/models/azure/cosmosdb/dblocation.py +120 -0
- cartography/models/azure/cosmosdb/mongodbcollection.py +82 -0
- cartography/models/azure/cosmosdb/mongodbdatabase.py +78 -0
- cartography/models/azure/cosmosdb/privateendpointconnection.py +81 -0
- cartography/models/azure/cosmosdb/sqlcontainer.py +88 -0
- cartography/models/azure/cosmosdb/sqldatabase.py +78 -0
- cartography/models/azure/cosmosdb/tableresource.py +76 -0
- cartography/models/azure/cosmosdb/virtualnetworkrule.py +78 -0
- cartography/models/azure/data_factory/__init__.py +0 -0
- cartography/models/azure/data_factory/data_factory.py +51 -0
- cartography/models/azure/data_factory/data_factory_dataset.py +94 -0
- cartography/models/azure/data_factory/data_factory_linked_service.py +78 -0
- cartography/models/azure/data_factory/data_factory_pipeline.py +93 -0
- cartography/models/azure/data_lake_filesystem.py +51 -0
- cartography/models/azure/event_grid_topic.py +57 -0
- cartography/models/azure/function_app.py +59 -0
- cartography/models/azure/load_balancer/__init__.py +0 -0
- cartography/models/azure/load_balancer/load_balancer.py +49 -0
- cartography/models/azure/load_balancer/load_balancer_backend_pool.py +73 -0
- cartography/models/azure/load_balancer/load_balancer_frontend_ip.py +75 -0
- cartography/models/azure/load_balancer/load_balancer_inbound_nat_rule.py +78 -0
- cartography/models/azure/load_balancer/load_balancer_rule.py +108 -0
- cartography/models/azure/logic_apps.py +56 -0
- cartography/models/azure/monitor.py +54 -0
- cartography/models/azure/network_interface.py +112 -0
- cartography/models/azure/network_security_group.py +50 -0
- cartography/models/azure/permission_relationships.py +60 -0
- cartography/models/azure/principal.py +41 -0
- cartography/models/azure/public_ip_address.py +50 -0
- cartography/models/azure/rbac.py +268 -0
- cartography/models/azure/resource_groups.py +52 -0
- cartography/models/azure/security_center.py +50 -0
- cartography/models/azure/sql/__init__.py +0 -0
- cartography/models/azure/sql/databasethreatdetectionpolicy.py +85 -0
- cartography/models/azure/sql/elasticpool.py +77 -0
- cartography/models/azure/sql/failovergroup.py +73 -0
- cartography/models/azure/sql/recoverabledatabase.py +75 -0
- cartography/models/azure/sql/replicationlink.py +81 -0
- cartography/models/azure/sql/restorabledroppeddatabase.py +82 -0
- cartography/models/azure/sql/restorepoint.py +74 -0
- cartography/models/azure/sql/serveradadministrator.py +74 -0
- cartography/models/azure/sql/serverdnsalias.py +71 -0
- cartography/models/azure/sql/sqldatabase.py +85 -0
- cartography/models/azure/sql/sqlserver.py +50 -0
- cartography/models/azure/sql/transparentdataencryption.py +76 -0
- cartography/models/azure/storage/__init__.py +0 -0
- cartography/models/azure/storage/account.py +59 -0
- cartography/models/azure/storage/blobcontainer.py +85 -0
- cartography/models/azure/storage/blobservice.py +71 -0
- cartography/models/azure/storage/fileservice.py +71 -0
- cartography/models/azure/storage/fileshare.py +82 -0
- cartography/models/azure/storage/queue.py +71 -0
- cartography/models/azure/storage/queueservice.py +73 -0
- cartography/models/azure/storage/table.py +72 -0
- cartography/models/azure/storage/tableservice.py +73 -0
- cartography/models/azure/subnet.py +101 -0
- cartography/models/azure/subscription.py +47 -0
- cartography/models/azure/tags/__init__.py +0 -0
- cartography/models/azure/tags/storage_tag.py +40 -0
- cartography/models/azure/tags/tag.py +37 -0
- cartography/models/azure/tenant.py +17 -0
- cartography/models/azure/virtual_network.py +49 -0
- cartography/models/azure/vm/__init__.py +0 -0
- cartography/models/azure/vm/datadisk.py +80 -0
- cartography/models/azure/vm/disk.py +55 -0
- cartography/models/azure/vm/snapshot.py +56 -0
- cartography/models/azure/vm/virtualmachine.py +59 -0
- cartography/models/bigfix/bigfix_computer.py +42 -38
- cartography/models/bigfix/bigfix_root.py +3 -3
- cartography/models/cloudflare/__init__.py +0 -0
- cartography/models/cloudflare/account.py +25 -0
- cartography/models/cloudflare/dnsrecord.py +55 -0
- cartography/models/cloudflare/member.py +86 -0
- cartography/models/cloudflare/role.py +44 -0
- cartography/models/cloudflare/zone.py +59 -0
- cartography/models/core/common.py +53 -2
- cartography/models/core/nodes.py +20 -4
- cartography/models/core/relationships.py +58 -6
- cartography/models/crowdstrike/__init__.py +0 -0
- cartography/models/crowdstrike/hosts.py +51 -0
- cartography/models/cve/cve.py +34 -32
- cartography/models/cve/cve_feed.py +6 -6
- cartography/models/digitalocean/__init__.py +0 -0
- cartography/models/digitalocean/account.py +21 -0
- cartography/models/digitalocean/droplet.py +58 -0
- cartography/models/digitalocean/project.py +48 -0
- cartography/models/duo/api_host.py +3 -3
- cartography/models/duo/endpoint.py +43 -41
- cartography/models/duo/group.py +14 -14
- cartography/models/duo/phone.py +27 -27
- cartography/models/duo/token.py +16 -16
- cartography/models/duo/user.py +50 -44
- cartography/models/duo/web_authn_credential.py +27 -19
- cartography/models/entra/__init__.py +0 -0
- cartography/models/entra/app_role_assignment.py +115 -0
- cartography/models/entra/application.py +49 -0
- cartography/models/entra/entra_user_to_aws_sso.py +41 -0
- cartography/models/entra/group.py +117 -0
- cartography/models/entra/ou.py +48 -0
- cartography/models/entra/service_principal.py +104 -0
- cartography/models/entra/tenant.py +39 -0
- cartography/models/entra/user.py +90 -0
- cartography/models/gcp/__init__.py +0 -0
- cartography/models/gcp/bigtable/__init__.py +0 -0
- cartography/models/gcp/bigtable/app_profile.py +94 -0
- cartography/models/gcp/bigtable/backup.py +91 -0
- cartography/models/gcp/bigtable/cluster.py +73 -0
- cartography/models/gcp/bigtable/instance.py +52 -0
- cartography/models/gcp/bigtable/table.py +69 -0
- cartography/models/gcp/compute/__init__.py +0 -0
- cartography/models/gcp/compute/subnet.py +74 -0
- cartography/models/gcp/compute/vpc.py +50 -0
- cartography/models/gcp/crm/__init__.py +0 -0
- cartography/models/gcp/crm/folders.py +98 -0
- cartography/models/gcp/crm/organizations.py +21 -0
- cartography/models/gcp/crm/projects.py +100 -0
- cartography/models/gcp/dns.py +109 -0
- cartography/models/gcp/gke.py +69 -0
- cartography/models/gcp/iam.py +73 -0
- cartography/models/gcp/permission_relationships.py +61 -0
- cartography/models/gcp/policy_bindings.py +93 -0
- cartography/models/gcp/storage/__init__.py +0 -0
- cartography/models/gcp/storage/bucket.py +119 -0
- cartography/models/github/commits.py +63 -0
- cartography/models/github/dependencies.py +73 -0
- cartography/models/github/manifests.py +49 -0
- cartography/models/github/orgs.py +27 -0
- cartography/models/github/teams.py +74 -22
- cartography/models/github/users.py +149 -0
- cartography/models/googleworkspace/__init__.py +0 -0
- cartography/models/googleworkspace/device.py +132 -0
- cartography/models/googleworkspace/group.py +382 -0
- cartography/models/googleworkspace/oauth_app.py +124 -0
- cartography/models/googleworkspace/tenant.py +30 -0
- cartography/models/googleworkspace/user.py +113 -0
- cartography/models/gsuite/__init__.py +0 -0
- cartography/models/gsuite/group.py +218 -0
- cartography/models/gsuite/tenant.py +29 -0
- cartography/models/gsuite/user.py +107 -0
- cartography/models/kandji/device.py +22 -17
- cartography/models/kandji/tenant.py +6 -4
- cartography/models/keycloak/__init__.py +0 -0
- cartography/models/keycloak/authenticationexecution.py +160 -0
- cartography/models/keycloak/authenticationflow.py +54 -0
- cartography/models/keycloak/client.py +179 -0
- cartography/models/keycloak/group.py +101 -0
- cartography/models/keycloak/identityprovider.py +89 -0
- cartography/models/keycloak/organization.py +116 -0
- cartography/models/keycloak/organizationdomain.py +73 -0
- cartography/models/keycloak/realm.py +173 -0
- cartography/models/keycloak/role.py +126 -0
- cartography/models/keycloak/scope.py +73 -0
- cartography/models/keycloak/user.py +55 -0
- cartography/models/kubernetes/__init__.py +0 -0
- cartography/models/kubernetes/clusterrolebindings.py +138 -0
- cartography/models/kubernetes/clusterroles.py +52 -0
- cartography/models/kubernetes/clusters.py +26 -0
- cartography/models/kubernetes/containers.py +133 -0
- cartography/models/kubernetes/groups.py +107 -0
- cartography/models/kubernetes/namespaces.py +51 -0
- cartography/models/kubernetes/oidc.py +51 -0
- cartography/models/kubernetes/pods.py +80 -0
- cartography/models/kubernetes/rolebindings.py +159 -0
- cartography/models/kubernetes/roles.py +76 -0
- cartography/models/kubernetes/secrets.py +79 -0
- cartography/models/kubernetes/serviceaccounts.py +77 -0
- cartography/models/kubernetes/services.py +108 -0
- cartography/models/kubernetes/users.py +105 -0
- cartography/models/lastpass/tenant.py +3 -3
- cartography/models/lastpass/user.py +36 -28
- cartography/models/ontology/__init__.py +0 -0
- cartography/models/ontology/device.py +137 -0
- cartography/models/ontology/mapping/__init__.py +76 -0
- cartography/models/ontology/mapping/data/__init__.py +0 -0
- cartography/models/ontology/mapping/data/apikeys.py +93 -0
- cartography/models/ontology/mapping/data/computeinstance.py +95 -0
- cartography/models/ontology/mapping/data/containers.py +88 -0
- cartography/models/ontology/mapping/data/databases.py +182 -0
- cartography/models/ontology/mapping/data/devices.py +194 -0
- cartography/models/ontology/mapping/data/thirdpartyapps.py +140 -0
- cartography/models/ontology/mapping/data/useraccounts.py +416 -0
- cartography/models/ontology/mapping/data/users.py +63 -0
- cartography/models/ontology/mapping/specs.py +85 -0
- cartography/models/ontology/user.py +51 -0
- cartography/models/openai/__init__.py +0 -0
- cartography/models/openai/adminapikey.py +94 -0
- cartography/models/openai/apikey.py +88 -0
- cartography/models/openai/organization.py +17 -0
- cartography/models/openai/project.py +89 -0
- cartography/models/openai/serviceaccount.py +50 -0
- cartography/models/openai/user.py +53 -0
- cartography/models/scaleway/__init__.py +0 -0
- cartography/models/scaleway/iam/__init__.py +0 -0
- cartography/models/scaleway/iam/apikey.py +100 -0
- cartography/models/scaleway/iam/application.py +52 -0
- cartography/models/scaleway/iam/group.py +95 -0
- cartography/models/scaleway/iam/user.py +64 -0
- cartography/models/scaleway/instance/__init__.py +0 -0
- cartography/models/scaleway/instance/flexibleip.py +52 -0
- cartography/models/scaleway/instance/instance.py +120 -0
- cartography/models/scaleway/organization.py +19 -0
- cartography/models/scaleway/project.py +48 -0
- cartography/models/scaleway/storage/__init__.py +0 -0
- cartography/models/scaleway/storage/snapshot.py +78 -0
- cartography/models/scaleway/storage/volume.py +51 -0
- cartography/models/semgrep/dependencies.py +102 -0
- cartography/models/semgrep/deployment.py +5 -5
- cartography/models/semgrep/findings.py +58 -40
- cartography/models/semgrep/locations.py +27 -21
- cartography/models/sentinelone/__init__.py +1 -0
- cartography/models/sentinelone/account.py +40 -0
- cartography/models/sentinelone/agent.py +50 -0
- cartography/models/sentinelone/application.py +44 -0
- cartography/models/sentinelone/application_version.py +96 -0
- cartography/models/sentinelone/cve.py +73 -0
- cartography/models/slack/__init__.py +0 -0
- cartography/models/slack/channels.py +92 -0
- cartography/models/slack/group.py +129 -0
- cartography/models/slack/team.py +22 -0
- cartography/models/slack/user.py +62 -0
- cartography/models/snipeit/__init__.py +0 -0
- cartography/models/snipeit/asset.py +92 -0
- cartography/models/snipeit/tenant.py +19 -0
- cartography/models/snipeit/user.py +60 -0
- cartography/models/spacelift/__init__.py +0 -0
- cartography/models/spacelift/cloudtrailevent.py +120 -0
- cartography/models/spacelift/run.py +162 -0
- cartography/models/spacelift/space.py +131 -0
- cartography/models/spacelift/spaceliftaccount.py +31 -0
- cartography/models/spacelift/spaceliftgitcommit.py +157 -0
- cartography/models/spacelift/stack.py +96 -0
- cartography/models/spacelift/user.py +63 -0
- cartography/models/spacelift/worker.py +97 -0
- cartography/models/spacelift/workerpool.py +90 -0
- cartography/models/tailscale/__init__.py +0 -0
- cartography/models/tailscale/device.py +96 -0
- cartography/models/tailscale/group.py +86 -0
- cartography/models/tailscale/postureintegration.py +58 -0
- cartography/models/tailscale/tag.py +102 -0
- cartography/models/tailscale/tailnet.py +29 -0
- cartography/models/tailscale/user.py +57 -0
- cartography/models/trivy/__init__.py +0 -0
- cartography/models/trivy/findings.py +66 -0
- cartography/models/trivy/fix.py +66 -0
- cartography/models/trivy/package.py +71 -0
- cartography/rules/README.md +1 -0
- cartography/rules/__init__.py +0 -0
- cartography/rules/cli.py +261 -0
- cartography/rules/data/__init__.py +0 -0
- cartography/rules/data/rules/__init__.py +46 -0
- cartography/rules/data/rules/cloud_security_product_deactivated.py +49 -0
- cartography/rules/data/rules/compute_instance_exposed.py +51 -0
- cartography/rules/data/rules/database_instance_exposed.py +53 -0
- cartography/rules/data/rules/delegation_boundary_modifiable.py +90 -0
- cartography/rules/data/rules/identity_administration_privileges.py +100 -0
- cartography/rules/data/rules/inactive_user_active_accounts.py +48 -0
- cartography/rules/data/rules/malicious_npm_dependencies_shai_hulud.py +2222 -0
- cartography/rules/data/rules/mfa_missing.py +46 -0
- cartography/rules/data/rules/object_storage_public.py +100 -0
- cartography/rules/data/rules/policy_administration_privileges.py +104 -0
- cartography/rules/data/rules/unmanaged_accounts.py +43 -0
- cartography/rules/data/rules/workload_identity_admin_capabilities.py +193 -0
- cartography/rules/formatters.py +108 -0
- cartography/rules/runners.py +216 -0
- cartography/rules/spec/__init__.py +0 -0
- cartography/rules/spec/model.py +267 -0
- cartography/rules/spec/result.py +38 -0
- cartography/stats.py +4 -4
- cartography/sync.py +137 -31
- cartography/util.py +187 -77
- cartography-0.123.0.dist-info/METADATA +230 -0
- cartography-0.123.0.dist-info/RECORD +856 -0
- {cartography-0.93.0rc1.dist-info → cartography-0.123.0.dist-info}/WHEEL +1 -1
- {cartography-0.93.0rc1.dist-info → cartography-0.123.0.dist-info}/entry_points.txt +1 -0
- {cartography-0.93.0rc1.dist-info → cartography-0.123.0.dist-info/licenses}/LICENSE +1 -1
- cartography/data/jobs/analysis/aws_ec2_iaminstance.json +0 -10
- cartography/data/jobs/analysis/aws_ec2_iaminstanceprofile.json +0 -10
- cartography/data/jobs/cleanup/aws_apigateway_details.json +0 -10
- cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
- cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
- cartography/data/jobs/cleanup/aws_import_apigateway_cleanup.json +0 -45
- cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
- cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
- cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
- cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
- cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
- cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
- cartography/data/jobs/cleanup/aws_import_vpc_peering_cleanup.json +0 -45
- cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
- cartography/data/jobs/cleanup/azure_cosmosdb_cassandra_keyspace_cleanup.json +0 -25
- cartography/data/jobs/cleanup/azure_cosmosdb_cors_details.json +0 -15
- cartography/data/jobs/cleanup/azure_cosmosdb_mongodb_database_cleanup.json +0 -25
- cartography/data/jobs/cleanup/azure_cosmosdb_sql_database_cleanup.json +0 -25
- cartography/data/jobs/cleanup/azure_cosmosdb_table_resources_cleanup.json +0 -15
- cartography/data/jobs/cleanup/azure_database_account_cleanup.json +0 -85
- cartography/data/jobs/cleanup/azure_import_disks_cleanup.json +0 -15
- cartography/data/jobs/cleanup/azure_import_snapshots_cleanup.json +0 -15
- cartography/data/jobs/cleanup/azure_import_virtual_machines_cleanup.json +0 -25
- cartography/data/jobs/cleanup/azure_sql_server_cleanup.json +0 -125
- cartography/data/jobs/cleanup/azure_storage_account_cleanup.json +0 -95
- cartography/data/jobs/cleanup/azure_subscriptions_cleanup.json +0 -14
- cartography/data/jobs/cleanup/azure_tenant_cleanup.json +0 -9
- cartography/data/jobs/cleanup/crxcavator_import_cleanup.json +0 -18
- cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
- cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
- cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
- cartography/data/jobs/cleanup/github_users_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
- cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
- cartography/intel/crxcavator/__init__.py +0 -44
- cartography/intel/crxcavator/crxcavator.py +0 -329
- cartography/intel/gcp/crm.py +0 -302
- cartography/intel/gsuite/api.py +0 -284
- cartography/models/aws/ec2/keypairs.py +0 -64
- cartography-0.93.0rc1.dist-info/METADATA +0 -55
- cartography-0.93.0rc1.dist-info/NOTICE +0 -4
- cartography-0.93.0rc1.dist-info/RECORD +0 -341
- /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
- {cartography-0.93.0rc1.dist-info → cartography-0.123.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import configparser
|
|
2
2
|
import logging
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from collections import namedtuple
|
|
3
5
|
from string import Template
|
|
4
6
|
from typing import Any
|
|
7
|
+
from typing import cast
|
|
5
8
|
from typing import Dict
|
|
6
9
|
from typing import List
|
|
7
10
|
from typing import Optional
|
|
@@ -11,19 +14,41 @@ from packaging.requirements import InvalidRequirement
|
|
|
11
14
|
from packaging.requirements import Requirement
|
|
12
15
|
from packaging.utils import canonicalize_name
|
|
13
16
|
|
|
17
|
+
from cartography.client.core.tx import execute_write_with_retry
|
|
18
|
+
from cartography.client.core.tx import load as load_data
|
|
19
|
+
from cartography.graph.job import GraphJob
|
|
14
20
|
from cartography.intel.github.util import fetch_all
|
|
21
|
+
from cartography.intel.github.util import PaginatedGraphqlData
|
|
22
|
+
from cartography.models.github.dependencies import GitHubDependencySchema
|
|
23
|
+
from cartography.models.github.manifests import DependencyGraphManifestSchema
|
|
24
|
+
from cartography.util import backoff_handler
|
|
25
|
+
from cartography.util import retries_with_backoff
|
|
15
26
|
from cartography.util import run_cleanup_job
|
|
16
27
|
from cartography.util import timeit
|
|
17
28
|
|
|
18
29
|
logger = logging.getLogger(__name__)
|
|
19
30
|
|
|
31
|
+
|
|
32
|
+
# Representation of a user's permission level and affiliation to a GitHub repo. See:
|
|
33
|
+
# - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
34
|
+
# - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
35
|
+
UserAffiliationAndRepoPermission = namedtuple(
|
|
36
|
+
"UserAffiliationAndRepoPermission",
|
|
37
|
+
[
|
|
38
|
+
"user", # Dict
|
|
39
|
+
"permission", # 'WRITE', 'MAINTAIN', 'ADMIN', etc
|
|
40
|
+
"affiliation", # 'OUTSIDE', 'DIRECT'
|
|
41
|
+
],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
20
45
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
21
|
-
query($login: String!, $cursor: String) {
|
|
46
|
+
query($login: String!, $cursor: String, $count: Int!) {
|
|
22
47
|
organization(login: $login)
|
|
23
48
|
{
|
|
24
49
|
url
|
|
25
50
|
login
|
|
26
|
-
repositories(first:
|
|
51
|
+
repositories(first: $count, after: $cursor){
|
|
27
52
|
pageInfo{
|
|
28
53
|
endCursor
|
|
29
54
|
hasNextPage
|
|
@@ -59,17 +84,11 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
59
84
|
login
|
|
60
85
|
__typename
|
|
61
86
|
}
|
|
62
|
-
collaborators(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
url
|
|
68
|
-
login
|
|
69
|
-
name
|
|
70
|
-
email
|
|
71
|
-
company
|
|
72
|
-
}
|
|
87
|
+
directCollaborators: collaborators(first: 100, affiliation: DIRECT) {
|
|
88
|
+
totalCount
|
|
89
|
+
}
|
|
90
|
+
outsideCollaborators: collaborators(first: 100, affiliation: OUTSIDE) {
|
|
91
|
+
totalCount
|
|
73
92
|
}
|
|
74
93
|
requirements:object(expression: "HEAD:requirements.txt") {
|
|
75
94
|
... on Blob {
|
|
@@ -81,6 +100,18 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
81
100
|
text
|
|
82
101
|
}
|
|
83
102
|
}
|
|
103
|
+
dependencyGraphManifests(first: 20) {
|
|
104
|
+
nodes {
|
|
105
|
+
blobPath
|
|
106
|
+
dependencies(first: 100) {
|
|
107
|
+
nodes {
|
|
108
|
+
packageName
|
|
109
|
+
requirements
|
|
110
|
+
packageManager
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
84
115
|
}
|
|
85
116
|
}
|
|
86
117
|
}
|
|
@@ -89,9 +120,175 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
89
120
|
# Note: In the above query, `HEAD` references the default branch.
|
|
90
121
|
# See https://stackoverflow.com/questions/48935381/github-graphql-api-default-branch-in-repository
|
|
91
122
|
|
|
123
|
+
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
|
|
124
|
+
query($login: String!, $repo: String!, $affiliation: CollaboratorAffiliation!, $cursor: String) {
|
|
125
|
+
organization(login: $login) {
|
|
126
|
+
url
|
|
127
|
+
login
|
|
128
|
+
repository(name: $repo){
|
|
129
|
+
name
|
|
130
|
+
collaborators(first: 50, affiliation: $affiliation, after: $cursor) {
|
|
131
|
+
edges {
|
|
132
|
+
permission
|
|
133
|
+
}
|
|
134
|
+
nodes {
|
|
135
|
+
url
|
|
136
|
+
login
|
|
137
|
+
name
|
|
138
|
+
email
|
|
139
|
+
company
|
|
140
|
+
}
|
|
141
|
+
pageInfo{
|
|
142
|
+
endCursor
|
|
143
|
+
hasNextPage
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
rateLimit {
|
|
149
|
+
limit
|
|
150
|
+
cost
|
|
151
|
+
remaining
|
|
152
|
+
resetAt
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _get_repo_collaborators_inner_func(
|
|
159
|
+
org: str,
|
|
160
|
+
api_url: str,
|
|
161
|
+
token: str,
|
|
162
|
+
repo_raw_data: list[dict[str, Any] | None],
|
|
163
|
+
affiliation: str,
|
|
164
|
+
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
165
|
+
result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
166
|
+
|
|
167
|
+
for repo in repo_raw_data:
|
|
168
|
+
# GitHub can return null repo entries. See issues #1334 and #1404.
|
|
169
|
+
if repo is None:
|
|
170
|
+
logger.info(
|
|
171
|
+
"Skipping null repository entry while fetching %s collaborators.",
|
|
172
|
+
affiliation,
|
|
173
|
+
)
|
|
174
|
+
continue
|
|
175
|
+
repo_name = repo["name"]
|
|
176
|
+
repo_url = repo["url"]
|
|
177
|
+
|
|
178
|
+
# Guard against None when collaborator fields are not accessible due to permissions.
|
|
179
|
+
direct_info = repo.get("directCollaborators")
|
|
180
|
+
outside_info = repo.get("outsideCollaborators")
|
|
181
|
+
|
|
182
|
+
if affiliation == "OUTSIDE":
|
|
183
|
+
total_outside = 0 if not outside_info else outside_info.get("totalCount", 0)
|
|
184
|
+
if total_outside == 0:
|
|
185
|
+
# No outside collaborators or not permitted to view; skip API calls for this repo.
|
|
186
|
+
result[repo_url] = []
|
|
187
|
+
continue
|
|
188
|
+
else: # DIRECT
|
|
189
|
+
total_direct = 0 if not direct_info else direct_info.get("totalCount", 0)
|
|
190
|
+
if total_direct == 0:
|
|
191
|
+
# No direct collaborators or not permitted to view; skip API calls for this repo.
|
|
192
|
+
result[repo_url] = []
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
|
|
196
|
+
collaborators = _get_repo_collaborators(
|
|
197
|
+
token,
|
|
198
|
+
api_url,
|
|
199
|
+
org,
|
|
200
|
+
repo_name,
|
|
201
|
+
affiliation,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
collab_users: List[dict[str, Any]] = []
|
|
205
|
+
collab_permission: List[str] = []
|
|
206
|
+
|
|
207
|
+
# nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
|
|
208
|
+
# however sometimes GitHub returns None, as in issue 1334 and 1404.
|
|
209
|
+
for collab in collaborators.nodes or []:
|
|
210
|
+
collab_users.append(collab)
|
|
211
|
+
|
|
212
|
+
# The `or []` is because `.edges` can be None.
|
|
213
|
+
for perm in collaborators.edges or []:
|
|
214
|
+
collab_permission.append(perm["permission"])
|
|
215
|
+
|
|
216
|
+
result[repo_url] = [
|
|
217
|
+
UserAffiliationAndRepoPermission(user, permission, affiliation)
|
|
218
|
+
for user, permission in zip(collab_users, collab_permission)
|
|
219
|
+
]
|
|
220
|
+
return result
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _get_repo_collaborators_for_multiple_repos(
|
|
224
|
+
repo_raw_data: list[dict[str, Any] | None],
|
|
225
|
+
affiliation: str,
|
|
226
|
+
org: str,
|
|
227
|
+
api_url: str,
|
|
228
|
+
token: str,
|
|
229
|
+
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
230
|
+
"""
|
|
231
|
+
For every repo in the given list, retrieve the collaborators.
|
|
232
|
+
:param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape.
|
|
233
|
+
:param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
|
|
234
|
+
See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
235
|
+
:param org: The name of the target Github organization as string.
|
|
236
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
237
|
+
:param token: The Github API token as string.
|
|
238
|
+
:return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
|
|
239
|
+
"""
|
|
240
|
+
logger.info(
|
|
241
|
+
f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".',
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
result: dict[str, list[UserAffiliationAndRepoPermission]] = retries_with_backoff(
|
|
245
|
+
_get_repo_collaborators_inner_func,
|
|
246
|
+
TypeError,
|
|
247
|
+
5,
|
|
248
|
+
backoff_handler,
|
|
249
|
+
)(
|
|
250
|
+
org=org,
|
|
251
|
+
api_url=api_url,
|
|
252
|
+
token=token,
|
|
253
|
+
repo_raw_data=repo_raw_data,
|
|
254
|
+
affiliation=affiliation,
|
|
255
|
+
)
|
|
256
|
+
return result
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _get_repo_collaborators(
|
|
260
|
+
token: str,
|
|
261
|
+
api_url: str,
|
|
262
|
+
organization: str,
|
|
263
|
+
repo: str,
|
|
264
|
+
affiliation: str,
|
|
265
|
+
) -> PaginatedGraphqlData:
|
|
266
|
+
"""
|
|
267
|
+
Retrieve a list of collaborators for a given repository, as described in
|
|
268
|
+
https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection.
|
|
269
|
+
:param token: The Github API token as string.
|
|
270
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
271
|
+
:param organization: The name of the target Github organization as string.
|
|
272
|
+
:pram repo: The name of the target Github repository as string.
|
|
273
|
+
:param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
|
|
274
|
+
See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
275
|
+
:return: A list of dicts representing repos. See tests.data.github.repos for data shape.
|
|
276
|
+
"""
|
|
277
|
+
collaborators, _ = fetch_all(
|
|
278
|
+
token,
|
|
279
|
+
api_url,
|
|
280
|
+
organization,
|
|
281
|
+
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
|
|
282
|
+
"repository",
|
|
283
|
+
resource_inner_type="collaborators",
|
|
284
|
+
repo=repo,
|
|
285
|
+
affiliation=affiliation,
|
|
286
|
+
)
|
|
287
|
+
return collaborators
|
|
288
|
+
|
|
92
289
|
|
|
93
290
|
@timeit
|
|
94
|
-
def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
291
|
+
def get(token: str, api_url: str, organization: str) -> List[Optional[Dict]]:
|
|
95
292
|
"""
|
|
96
293
|
Retrieve a list of repos from a Github organization as described in
|
|
97
294
|
https://docs.github.com/en/graphql/reference/objects#repository.
|
|
@@ -99,6 +296,8 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
99
296
|
:param api_url: The Github v4 API endpoint as string.
|
|
100
297
|
:param organization: The name of the target Github organization as string.
|
|
101
298
|
:return: A list of dicts representing repos. See tests.data.github.repos for data shape.
|
|
299
|
+
Note: The list may contain None entries per GraphQL spec when resolvers error
|
|
300
|
+
(permissions, rate limits, transient issues). See issues #1334 and #1404.
|
|
102
301
|
"""
|
|
103
302
|
# TODO: link the Github organization to the repositories
|
|
104
303
|
repos, _ = fetch_all(
|
|
@@ -106,41 +305,118 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
106
305
|
api_url,
|
|
107
306
|
organization,
|
|
108
307
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
|
|
109
|
-
|
|
308
|
+
"repositories",
|
|
309
|
+
count=50,
|
|
110
310
|
)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
311
|
+
# Cast is needed because GitHub's GraphQL RepositoryConnection.nodes is typed [Repository] (not [Repository!])
|
|
312
|
+
# per GraphQL spec, allowing null entries when resolvers error (permissions, rate limits, transient issues).
|
|
313
|
+
# See https://github.com/cartography-cncf/cartography/issues/1334
|
|
314
|
+
# and https://github.com/cartography-cncf/cartography/issues/1404
|
|
315
|
+
return cast(List[Optional[Dict]], repos.nodes)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def transform(
|
|
319
|
+
repos_json: List[Optional[Dict]],
|
|
320
|
+
direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
321
|
+
outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
322
|
+
) -> Dict:
|
|
115
323
|
"""
|
|
116
324
|
Parses the JSON returned from GitHub API to create data for graph ingestion
|
|
117
|
-
:param repos_json: the list of individual repository nodes from GitHub.
|
|
118
|
-
|
|
325
|
+
:param repos_json: the list of individual repository nodes from GitHub.
|
|
326
|
+
See tests.data.github.repos.GET_REPOS for data shape.
|
|
327
|
+
:param direct_collaborators: dict of repo URL to list of direct collaborators.
|
|
328
|
+
See tests.data.github.repos.DIRECT_COLLABORATORS for data shape.
|
|
329
|
+
:param outside_collaborators: dict of repo URL to list of outside collaborators.
|
|
330
|
+
See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
|
|
119
331
|
:return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
|
|
120
|
-
mapping,
|
|
332
|
+
mapping, Python requirements files (if any) in a repo, manifests from GitHub's dependency graph, and all
|
|
333
|
+
dependencies from GitHub's dependency graph.
|
|
121
334
|
"""
|
|
335
|
+
logger.info(f"Processing {len(repos_json)} GitHub repositories")
|
|
122
336
|
transformed_repo_list: List[Dict] = []
|
|
123
337
|
transformed_repo_languages: List[Dict] = []
|
|
124
338
|
transformed_repo_owners: List[Dict] = []
|
|
125
339
|
# See https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
126
|
-
|
|
127
|
-
|
|
340
|
+
transformed_outside_collaborators: Dict[str, List[Any]] = {
|
|
341
|
+
"ADMIN": [],
|
|
342
|
+
"MAINTAIN": [],
|
|
343
|
+
"READ": [],
|
|
344
|
+
"TRIAGE": [],
|
|
345
|
+
"WRITE": [],
|
|
346
|
+
}
|
|
347
|
+
transformed_direct_collaborators: Dict[str, List[Any]] = {
|
|
348
|
+
"ADMIN": [],
|
|
349
|
+
"MAINTAIN": [],
|
|
350
|
+
"READ": [],
|
|
351
|
+
"TRIAGE": [],
|
|
352
|
+
"WRITE": [],
|
|
128
353
|
}
|
|
129
354
|
transformed_requirements_files: List[Dict] = []
|
|
355
|
+
transformed_dependencies: List[Dict] = []
|
|
356
|
+
transformed_manifests: List[Dict] = []
|
|
130
357
|
for repo_object in repos_json:
|
|
131
|
-
|
|
358
|
+
# GitHub can return null repo entries. See issues #1334 and #1404.
|
|
359
|
+
if repo_object is None:
|
|
360
|
+
logger.debug("Skipping null repository entry during transformation.")
|
|
361
|
+
continue
|
|
362
|
+
_transform_repo_languages(
|
|
363
|
+
repo_object["url"],
|
|
364
|
+
repo_object,
|
|
365
|
+
transformed_repo_languages,
|
|
366
|
+
)
|
|
132
367
|
_transform_repo_objects(repo_object, transformed_repo_list)
|
|
133
|
-
_transform_repo_owners(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
368
|
+
_transform_repo_owners(
|
|
369
|
+
repo_object["owner"]["url"],
|
|
370
|
+
repo_object,
|
|
371
|
+
transformed_repo_owners,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# Allow sync to continue if we didn't have permissions to list collaborators
|
|
375
|
+
repo_url = repo_object["url"]
|
|
376
|
+
if repo_url in outside_collaborators:
|
|
377
|
+
_transform_collaborators(
|
|
378
|
+
repo_object["url"],
|
|
379
|
+
outside_collaborators[repo_object["url"]],
|
|
380
|
+
transformed_outside_collaborators,
|
|
381
|
+
)
|
|
382
|
+
if repo_url in direct_collaborators:
|
|
383
|
+
_transform_collaborators(
|
|
384
|
+
repo_object["url"],
|
|
385
|
+
direct_collaborators[repo_object["url"]],
|
|
386
|
+
transformed_direct_collaborators,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
_transform_requirements_txt(
|
|
390
|
+
repo_object["requirements"],
|
|
391
|
+
repo_url,
|
|
392
|
+
transformed_requirements_files,
|
|
393
|
+
)
|
|
394
|
+
_transform_setup_cfg_requirements(
|
|
395
|
+
repo_object["setupCfg"],
|
|
396
|
+
repo_url,
|
|
397
|
+
transformed_requirements_files,
|
|
398
|
+
)
|
|
399
|
+
_transform_dependency_manifests(
|
|
400
|
+
repo_object.get("dependencyGraphManifests"),
|
|
401
|
+
repo_url,
|
|
402
|
+
transformed_manifests,
|
|
403
|
+
)
|
|
404
|
+
_transform_dependency_graph(
|
|
405
|
+
repo_object.get("dependencyGraphManifests"),
|
|
406
|
+
repo_url,
|
|
407
|
+
transformed_dependencies,
|
|
408
|
+
)
|
|
137
409
|
results = {
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
410
|
+
"repos": transformed_repo_list,
|
|
411
|
+
"repo_languages": transformed_repo_languages,
|
|
412
|
+
"repo_owners": transformed_repo_owners,
|
|
413
|
+
"repo_outside_collaborators": transformed_outside_collaborators,
|
|
414
|
+
"repo_direct_collaborators": transformed_direct_collaborators,
|
|
415
|
+
"python_requirements": transformed_requirements_files,
|
|
416
|
+
"dependencies": transformed_dependencies,
|
|
417
|
+
"manifests": transformed_manifests,
|
|
143
418
|
}
|
|
419
|
+
|
|
144
420
|
return results
|
|
145
421
|
|
|
146
422
|
|
|
@@ -154,9 +430,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
|
|
|
154
430
|
|
|
155
431
|
def _create_git_url_from_ssh_url(ssh_url: str) -> str:
|
|
156
432
|
"""
|
|
157
|
-
|
|
433
|
+
Convert SSH URL to git:// URL.
|
|
434
|
+
Example:
|
|
435
|
+
git@github.com:cartography-cncf/cartography.git
|
|
436
|
+
-> git://github.com/cartography-cncf/cartography.git
|
|
158
437
|
"""
|
|
159
|
-
|
|
438
|
+
# Remove the user part (e.g., "git@")
|
|
439
|
+
_, host_and_path = ssh_url.split("@", 1)
|
|
440
|
+
# Replace first ':' (separating host and repo) with '/'
|
|
441
|
+
host, path = host_and_path.split(":", 1)
|
|
442
|
+
return f"git://{host}/{path}"
|
|
160
443
|
|
|
161
444
|
|
|
162
445
|
def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
|
|
@@ -168,33 +451,37 @@ def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict])
|
|
|
168
451
|
:return: Nothing
|
|
169
452
|
"""
|
|
170
453
|
# Create a unique ID for a GitHubBranch node representing the default branch of this repo object.
|
|
171
|
-
dbr = input_repo_object[
|
|
172
|
-
default_branch_name = dbr[
|
|
173
|
-
default_branch_id =
|
|
454
|
+
dbr = input_repo_object["defaultBranchRef"]
|
|
455
|
+
default_branch_name = dbr["name"] if dbr else None
|
|
456
|
+
default_branch_id = (
|
|
457
|
+
_create_default_branch_id(input_repo_object["url"], dbr["id"]) if dbr else None
|
|
458
|
+
)
|
|
174
459
|
|
|
175
460
|
# Create a git:// URL from the given SSH URL, if it exists.
|
|
176
|
-
ssh_url = input_repo_object.get(
|
|
461
|
+
ssh_url = input_repo_object.get("sshUrl")
|
|
177
462
|
git_url = _create_git_url_from_ssh_url(ssh_url) if ssh_url else None
|
|
178
463
|
|
|
179
|
-
out_repo_list.append(
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
464
|
+
out_repo_list.append(
|
|
465
|
+
{
|
|
466
|
+
"id": input_repo_object["url"],
|
|
467
|
+
"createdat": input_repo_object["createdAt"],
|
|
468
|
+
"name": input_repo_object["name"],
|
|
469
|
+
"fullname": input_repo_object["nameWithOwner"],
|
|
470
|
+
"description": input_repo_object["description"],
|
|
471
|
+
"primarylanguage": input_repo_object["primaryLanguage"],
|
|
472
|
+
"homepage": input_repo_object["homepageUrl"],
|
|
473
|
+
"defaultbranch": default_branch_name,
|
|
474
|
+
"defaultbranchid": default_branch_id,
|
|
475
|
+
"private": input_repo_object["isPrivate"],
|
|
476
|
+
"disabled": input_repo_object["isDisabled"],
|
|
477
|
+
"archived": input_repo_object["isArchived"],
|
|
478
|
+
"locked": input_repo_object["isLocked"],
|
|
479
|
+
"giturl": git_url,
|
|
480
|
+
"url": input_repo_object["url"],
|
|
481
|
+
"sshurl": ssh_url,
|
|
482
|
+
"updatedat": input_repo_object["updatedAt"],
|
|
483
|
+
},
|
|
484
|
+
)
|
|
198
485
|
|
|
199
486
|
|
|
200
487
|
def _transform_repo_owners(owner_id: str, repo: Dict, repo_owners: List[Dict]) -> None:
|
|
@@ -205,15 +492,21 @@ def _transform_repo_owners(owner_id: str, repo: Dict, repo_owners: List[Dict]) -
|
|
|
205
492
|
:param repo_owners: Output array to append transformed results to.
|
|
206
493
|
:return: Nothing.
|
|
207
494
|
"""
|
|
208
|
-
repo_owners.append(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
495
|
+
repo_owners.append(
|
|
496
|
+
{
|
|
497
|
+
"repo_id": repo["url"],
|
|
498
|
+
"owner": repo["owner"]["login"],
|
|
499
|
+
"owner_id": owner_id,
|
|
500
|
+
"type": repo["owner"]["__typename"],
|
|
501
|
+
},
|
|
502
|
+
)
|
|
214
503
|
|
|
215
504
|
|
|
216
|
-
def _transform_repo_languages(
|
|
505
|
+
def _transform_repo_languages(
|
|
506
|
+
repo_url: str,
|
|
507
|
+
repo: Dict,
|
|
508
|
+
repo_languages: List[Dict],
|
|
509
|
+
) -> None:
|
|
217
510
|
"""
|
|
218
511
|
Helper function to transform the languages in a GitHub repo.
|
|
219
512
|
:param repo_url: The URL of the repo.
|
|
@@ -221,19 +514,27 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
|
|
|
221
514
|
:param repo_languages: Output array to append transformed results to.
|
|
222
515
|
:return: Nothing.
|
|
223
516
|
"""
|
|
224
|
-
if repo[
|
|
225
|
-
for language in repo[
|
|
226
|
-
repo_languages.append(
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
517
|
+
if repo["languages"]["totalCount"] > 0:
|
|
518
|
+
for language in repo["languages"]["nodes"]:
|
|
519
|
+
repo_languages.append(
|
|
520
|
+
{
|
|
521
|
+
"repo_id": repo_url,
|
|
522
|
+
"language_name": language["name"],
|
|
523
|
+
},
|
|
524
|
+
)
|
|
230
525
|
|
|
231
526
|
|
|
232
|
-
def _transform_collaborators(
|
|
527
|
+
def _transform_collaborators(
|
|
528
|
+
repo_url: str,
|
|
529
|
+
collaborators: List[UserAffiliationAndRepoPermission],
|
|
530
|
+
transformed_collaborators: Dict,
|
|
531
|
+
) -> None:
|
|
233
532
|
"""
|
|
234
|
-
Performs data adjustments for
|
|
533
|
+
Performs data adjustments for collaborators in a GitHub repo.
|
|
235
534
|
Output data shape = [{permission, repo_url, url (the user's URL), login, name}, ...]
|
|
236
|
-
:param collaborators:
|
|
535
|
+
:param collaborators: For data shape, see
|
|
536
|
+
cartography.tests.data.github.repos.DIRECT_COLLABORATORS
|
|
537
|
+
cartography.tests.data.github.repos.OUTSIDE_COLLABORATORS
|
|
237
538
|
:param repo_url: The URL of the GitHub repo.
|
|
238
539
|
:param transformed_collaborators: Output dict. Data shape =
|
|
239
540
|
{'ADMIN': [{ user }, ...], 'MAINTAIN': [{ user }, ...], 'READ': [ ... ], 'TRIAGE': [ ... ], 'WRITE': [ ... ]}
|
|
@@ -241,10 +542,11 @@ def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_col
|
|
|
241
542
|
"""
|
|
242
543
|
# `collaborators` is sometimes None
|
|
243
544
|
if collaborators:
|
|
244
|
-
for
|
|
245
|
-
|
|
246
|
-
user[
|
|
247
|
-
|
|
545
|
+
for collaborator in collaborators:
|
|
546
|
+
user = collaborator.user
|
|
547
|
+
user["repo_url"] = repo_url
|
|
548
|
+
user["affiliation"] = collaborator.affiliation
|
|
549
|
+
transformed_collaborators[collaborator.permission].append(user)
|
|
248
550
|
|
|
249
551
|
|
|
250
552
|
def _transform_requirements_txt(
|
|
@@ -259,10 +561,14 @@ def _transform_requirements_txt(
|
|
|
259
561
|
:param out_requirements_files: Output array to append transformed results to.
|
|
260
562
|
:return: Nothing.
|
|
261
563
|
"""
|
|
262
|
-
if req_file_contents and req_file_contents.get(
|
|
263
|
-
text_contents = req_file_contents[
|
|
564
|
+
if req_file_contents and req_file_contents.get("text"):
|
|
565
|
+
text_contents = req_file_contents["text"]
|
|
264
566
|
requirements_list = text_contents.split("\n")
|
|
265
|
-
_transform_python_requirements(
|
|
567
|
+
_transform_python_requirements(
|
|
568
|
+
requirements_list,
|
|
569
|
+
repo_url,
|
|
570
|
+
out_requirements_files,
|
|
571
|
+
)
|
|
266
572
|
|
|
267
573
|
|
|
268
574
|
def _transform_setup_cfg_requirements(
|
|
@@ -277,9 +583,9 @@ def _transform_setup_cfg_requirements(
|
|
|
277
583
|
:param out_requirements_files: Output array to append transformed results to.
|
|
278
584
|
:return: Nothing.
|
|
279
585
|
"""
|
|
280
|
-
if not setup_cfg_contents or not setup_cfg_contents.get(
|
|
586
|
+
if not setup_cfg_contents or not setup_cfg_contents.get("text"):
|
|
281
587
|
return
|
|
282
|
-
text_contents = setup_cfg_contents[
|
|
588
|
+
text_contents = setup_cfg_contents["text"]
|
|
283
589
|
setup_cfg = configparser.ConfigParser()
|
|
284
590
|
try:
|
|
285
591
|
setup_cfg.read_string(text_contents)
|
|
@@ -293,6 +599,153 @@ def _transform_setup_cfg_requirements(
|
|
|
293
599
|
_transform_python_requirements(requirements_list, repo_url, out_requirements_files)
|
|
294
600
|
|
|
295
601
|
|
|
602
|
+
def _transform_dependency_manifests(
|
|
603
|
+
dependency_manifests: Optional[Dict],
|
|
604
|
+
repo_url: str,
|
|
605
|
+
out_manifests_list: List[Dict],
|
|
606
|
+
) -> None:
|
|
607
|
+
"""
|
|
608
|
+
Transform GitHub dependency graph manifests into cartography manifest format.
|
|
609
|
+
:param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
|
|
610
|
+
:param repo_url: The URL of the GitHub repo
|
|
611
|
+
:param out_manifests_list: Output array to append transformed results to
|
|
612
|
+
:return: Nothing
|
|
613
|
+
"""
|
|
614
|
+
if not dependency_manifests or not dependency_manifests.get("nodes"):
|
|
615
|
+
return
|
|
616
|
+
|
|
617
|
+
manifests_added = 0
|
|
618
|
+
|
|
619
|
+
for manifest in dependency_manifests["nodes"]:
|
|
620
|
+
blob_path = manifest.get("blobPath", "")
|
|
621
|
+
if not blob_path:
|
|
622
|
+
continue
|
|
623
|
+
|
|
624
|
+
# Count dependencies in this manifest
|
|
625
|
+
dependencies = manifest.get("dependencies", {})
|
|
626
|
+
dependencies_count = len(dependencies.get("nodes", []) if dependencies else [])
|
|
627
|
+
|
|
628
|
+
# Create unique manifest ID by combining repo URL and blob path
|
|
629
|
+
manifest_id = f"{repo_url}#{blob_path}"
|
|
630
|
+
|
|
631
|
+
# Extract filename from blob path
|
|
632
|
+
filename = blob_path.split("/")[-1] if blob_path else "None"
|
|
633
|
+
|
|
634
|
+
out_manifests_list.append(
|
|
635
|
+
{
|
|
636
|
+
"id": manifest_id,
|
|
637
|
+
"blob_path": blob_path,
|
|
638
|
+
"filename": filename,
|
|
639
|
+
"dependencies_count": dependencies_count,
|
|
640
|
+
"repo_url": repo_url,
|
|
641
|
+
}
|
|
642
|
+
)
|
|
643
|
+
manifests_added += 1
|
|
644
|
+
|
|
645
|
+
if manifests_added > 0:
|
|
646
|
+
repo_name = repo_url.split("/")[-1] if repo_url else "repository"
|
|
647
|
+
logger.info(f"Found {manifests_added} dependency manifests in {repo_name}")
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _transform_dependency_graph(
|
|
651
|
+
dependency_manifests: Optional[Dict],
|
|
652
|
+
repo_url: str,
|
|
653
|
+
out_dependencies_list: List[Dict],
|
|
654
|
+
) -> None:
|
|
655
|
+
"""
|
|
656
|
+
Transform GitHub dependency graph manifests into cartography dependency format.
|
|
657
|
+
:param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
|
|
658
|
+
:param repo_url: The URL of the GitHub repo
|
|
659
|
+
:param out_dependencies_list: Output array to append transformed results to
|
|
660
|
+
:return: Nothing
|
|
661
|
+
"""
|
|
662
|
+
if not dependency_manifests or not dependency_manifests.get("nodes"):
|
|
663
|
+
return
|
|
664
|
+
|
|
665
|
+
dependencies_added = 0
|
|
666
|
+
|
|
667
|
+
for manifest in dependency_manifests["nodes"]:
|
|
668
|
+
dependencies = manifest.get("dependencies", {})
|
|
669
|
+
if not dependencies or not dependencies.get("nodes"):
|
|
670
|
+
continue
|
|
671
|
+
|
|
672
|
+
manifest_path = manifest.get("blobPath", "")
|
|
673
|
+
|
|
674
|
+
for dep in dependencies["nodes"]:
|
|
675
|
+
package_name = dep.get("packageName")
|
|
676
|
+
if not package_name:
|
|
677
|
+
continue
|
|
678
|
+
|
|
679
|
+
requirements = dep.get("requirements", "")
|
|
680
|
+
package_manager = dep.get("packageManager", "").upper()
|
|
681
|
+
|
|
682
|
+
# Create ecosystem-specific canonical name
|
|
683
|
+
canonical_name = _canonicalize_dependency_name(
|
|
684
|
+
package_name, package_manager
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
# Create ecosystem identifier
|
|
688
|
+
ecosystem = package_manager.lower() if package_manager else "unknown"
|
|
689
|
+
|
|
690
|
+
# Create simple dependency ID using canonical name and requirements
|
|
691
|
+
# This allows the same dependency to be shared across multiple repos
|
|
692
|
+
requirements_for_id = (requirements or "").strip()
|
|
693
|
+
dependency_id = (
|
|
694
|
+
f"{canonical_name}|{requirements_for_id}"
|
|
695
|
+
if requirements_for_id
|
|
696
|
+
else canonical_name
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
# Normalize requirements field (prefer None over empty string)
|
|
700
|
+
normalized_requirements = requirements if requirements else None
|
|
701
|
+
|
|
702
|
+
# Create manifest ID for the HAS_DEP relationship
|
|
703
|
+
manifest_id = f"{repo_url}#{manifest_path}"
|
|
704
|
+
|
|
705
|
+
out_dependencies_list.append(
|
|
706
|
+
{
|
|
707
|
+
"id": dependency_id,
|
|
708
|
+
"name": canonical_name,
|
|
709
|
+
"original_name": package_name, # Keep original for reference
|
|
710
|
+
"requirements": normalized_requirements,
|
|
711
|
+
"ecosystem": ecosystem,
|
|
712
|
+
"package_manager": package_manager,
|
|
713
|
+
"manifest_path": manifest_path,
|
|
714
|
+
"manifest_id": manifest_id,
|
|
715
|
+
"repo_url": repo_url,
|
|
716
|
+
"manifest_file": (
|
|
717
|
+
manifest_path.split("/")[-1] if manifest_path else ""
|
|
718
|
+
),
|
|
719
|
+
}
|
|
720
|
+
)
|
|
721
|
+
dependencies_added += 1
|
|
722
|
+
|
|
723
|
+
if dependencies_added > 0:
|
|
724
|
+
repo_name = repo_url.split("/")[-1] if repo_url else "repository"
|
|
725
|
+
logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
|
|
729
|
+
"""
|
|
730
|
+
Canonicalize dependency names based on ecosystem conventions.
|
|
731
|
+
"""
|
|
732
|
+
if not name:
|
|
733
|
+
return name
|
|
734
|
+
|
|
735
|
+
# For Python packages, use existing canonicalization
|
|
736
|
+
if package_manager in ["PIP", "CONDA"]:
|
|
737
|
+
try:
|
|
738
|
+
from packaging.utils import canonicalize_name
|
|
739
|
+
|
|
740
|
+
return str(canonicalize_name(name))
|
|
741
|
+
except ImportError:
|
|
742
|
+
# Fallback if packaging not available
|
|
743
|
+
return name.lower().replace("_", "-")
|
|
744
|
+
|
|
745
|
+
# For other ecosystems, use lowercase
|
|
746
|
+
return name.lower()
|
|
747
|
+
|
|
748
|
+
|
|
296
749
|
def _transform_python_requirements(
|
|
297
750
|
requirements_list: List[str],
|
|
298
751
|
repo_url: str,
|
|
@@ -307,8 +760,8 @@ def _transform_python_requirements(
|
|
|
307
760
|
"""
|
|
308
761
|
parsed_list = []
|
|
309
762
|
for line in requirements_list:
|
|
310
|
-
stripped_line = line.partition(
|
|
311
|
-
if stripped_line ==
|
|
763
|
+
stripped_line = line.partition("#")[0].strip()
|
|
764
|
+
if stripped_line == "":
|
|
312
765
|
continue
|
|
313
766
|
try:
|
|
314
767
|
req = Requirement(stripped_line)
|
|
@@ -316,7 +769,7 @@ def _transform_python_requirements(
|
|
|
316
769
|
except InvalidRequirement:
|
|
317
770
|
# INFO and not WARN/ERROR as we intentionally don't support all ways to specify Python requirements
|
|
318
771
|
logger.info(
|
|
319
|
-
f
|
|
772
|
+
f'Failed to parse line "{line}" in repo {repo_url}\'s requirements.txt; skipping line.',
|
|
320
773
|
exc_info=True,
|
|
321
774
|
)
|
|
322
775
|
|
|
@@ -324,32 +777,44 @@ def _transform_python_requirements(
|
|
|
324
777
|
pinned_version = None
|
|
325
778
|
if len(req.specifier) == 1:
|
|
326
779
|
specifier = next(iter(req.specifier))
|
|
327
|
-
if specifier.operator ==
|
|
780
|
+
if specifier.operator == "==":
|
|
328
781
|
pinned_version = specifier.version
|
|
329
782
|
|
|
330
783
|
# Set `spec` to a default value. Example values for str(req.specifier): "<4.0,>=3.0" or "==1.0.0".
|
|
331
784
|
spec: Optional[str] = str(req.specifier)
|
|
332
785
|
# Set spec to `None` instead of empty string so that the Neo4j driver will leave the library.specifier field
|
|
333
786
|
# undefined. As convention, we prefer undefined values over empty strings in the graph.
|
|
334
|
-
if spec ==
|
|
787
|
+
if spec == "":
|
|
335
788
|
spec = None
|
|
336
789
|
|
|
337
790
|
canon_name = canonicalize_name(req.name)
|
|
338
|
-
requirement_id =
|
|
791
|
+
requirement_id = (
|
|
792
|
+
f"{canon_name}|{pinned_version}" if pinned_version else canon_name
|
|
793
|
+
)
|
|
339
794
|
|
|
340
|
-
out_requirements_files.append(
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
795
|
+
out_requirements_files.append(
|
|
796
|
+
{
|
|
797
|
+
"id": requirement_id,
|
|
798
|
+
"name": canon_name,
|
|
799
|
+
"specifier": spec,
|
|
800
|
+
"version": pinned_version,
|
|
801
|
+
"repo_url": repo_url,
|
|
802
|
+
},
|
|
803
|
+
)
|
|
347
804
|
|
|
348
805
|
|
|
349
806
|
def parse_setup_cfg(config: configparser.ConfigParser) -> List[str]:
|
|
350
807
|
reqs: List[str] = []
|
|
351
|
-
reqs.extend(
|
|
352
|
-
|
|
808
|
+
reqs.extend(
|
|
809
|
+
_parse_setup_cfg_requirements(
|
|
810
|
+
config.get("options", "install_requires", fallback=""),
|
|
811
|
+
),
|
|
812
|
+
)
|
|
813
|
+
reqs.extend(
|
|
814
|
+
_parse_setup_cfg_requirements(
|
|
815
|
+
config.get("options", "setup_requires", fallback=""),
|
|
816
|
+
),
|
|
817
|
+
)
|
|
353
818
|
if config.has_section("options.extras_require"):
|
|
354
819
|
for _, val in config.items("options.extras_require"):
|
|
355
820
|
reqs.extend(_parse_setup_cfg_requirements(val))
|
|
@@ -368,7 +833,11 @@ def _parse_setup_cfg_requirements(reqs: str, separator: str = ";") -> List[str]:
|
|
|
368
833
|
|
|
369
834
|
|
|
370
835
|
@timeit
|
|
371
|
-
def load_github_repos(
|
|
836
|
+
def load_github_repos(
|
|
837
|
+
neo4j_session: neo4j.Session,
|
|
838
|
+
update_tag: int,
|
|
839
|
+
repo_data: List[Dict],
|
|
840
|
+
) -> None:
|
|
372
841
|
"""
|
|
373
842
|
Ingest the GitHub repository information
|
|
374
843
|
:param neo4j_session: Neo4J session object for server communication
|
|
@@ -411,15 +880,23 @@ def load_github_repos(neo4j_session: neo4j.Session, update_tag: int, repo_data:
|
|
|
411
880
|
ON CREATE SET r.firstseen = timestamp()
|
|
412
881
|
SET r.lastupdated = r.UpdateTag
|
|
413
882
|
"""
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
883
|
+
|
|
884
|
+
def _ingest_repos_tx(tx: neo4j.Transaction) -> None:
|
|
885
|
+
tx.run(
|
|
886
|
+
ingest_repo,
|
|
887
|
+
RepoData=repo_data,
|
|
888
|
+
UpdateTag=update_tag,
|
|
889
|
+
).consume()
|
|
890
|
+
|
|
891
|
+
execute_write_with_retry(neo4j_session, _ingest_repos_tx)
|
|
419
892
|
|
|
420
893
|
|
|
421
894
|
@timeit
|
|
422
|
-
def load_github_languages(
|
|
895
|
+
def load_github_languages(
|
|
896
|
+
neo4j_session: neo4j.Session,
|
|
897
|
+
update_tag: int,
|
|
898
|
+
repo_languages: List[Dict],
|
|
899
|
+
) -> None:
|
|
423
900
|
"""
|
|
424
901
|
Ingest the relationships for repo languages
|
|
425
902
|
:param neo4j_session: Neo4J session object for server communication
|
|
@@ -441,15 +918,22 @@ def load_github_languages(neo4j_session: neo4j.Session, update_tag: int, repo_la
|
|
|
441
918
|
ON CREATE SET r.firstseen = timestamp()
|
|
442
919
|
SET r.lastupdated = $UpdateTag"""
|
|
443
920
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
921
|
+
def _ingest_languages_tx(tx: neo4j.Transaction) -> None:
|
|
922
|
+
tx.run(
|
|
923
|
+
ingest_languages,
|
|
924
|
+
Languages=repo_languages,
|
|
925
|
+
UpdateTag=update_tag,
|
|
926
|
+
).consume()
|
|
927
|
+
|
|
928
|
+
execute_write_with_retry(neo4j_session, _ingest_languages_tx)
|
|
449
929
|
|
|
450
930
|
|
|
451
931
|
@timeit
|
|
452
|
-
def load_github_owners(
|
|
932
|
+
def load_github_owners(
|
|
933
|
+
neo4j_session: neo4j.Session,
|
|
934
|
+
update_tag: int,
|
|
935
|
+
repo_owners: List[Dict],
|
|
936
|
+
) -> None:
|
|
453
937
|
"""
|
|
454
938
|
Ingest the relationships for repo owners
|
|
455
939
|
:param neo4j_session: Neo4J session object for server communication
|
|
@@ -457,33 +941,55 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
|
|
|
457
941
|
:param repo_owners: list of owner to repo mappings
|
|
458
942
|
:return: Nothing
|
|
459
943
|
"""
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
944
|
+
ingest_owner_template = Template(
|
|
945
|
+
"""
|
|
946
|
+
MERGE (user:$account_type{id: $Id})
|
|
947
|
+
ON CREATE SET user.firstseen = timestamp()
|
|
948
|
+
SET user.username = $UserName,
|
|
949
|
+
user.lastupdated = $UpdateTag
|
|
950
|
+
WITH user
|
|
951
|
+
|
|
952
|
+
MATCH (repo:GitHubRepository{id: $RepoId})
|
|
953
|
+
MERGE (user)<-[r:OWNER]-(repo)
|
|
954
|
+
ON CREATE SET r.firstseen = timestamp()
|
|
955
|
+
SET r.lastupdated = $UpdateTag""",
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
|
|
959
|
+
|
|
960
|
+
def _ingest_owner_tx(
|
|
961
|
+
tx: neo4j.Transaction,
|
|
962
|
+
owner_record: Dict,
|
|
963
|
+
owner_label: str,
|
|
964
|
+
) -> None:
|
|
965
|
+
tx.run(
|
|
966
|
+
ingest_owner_template.safe_substitute(
|
|
967
|
+
account_type=owner_label,
|
|
968
|
+
),
|
|
969
|
+
Id=owner_record["owner_id"],
|
|
970
|
+
UserName=owner_record["owner"],
|
|
971
|
+
RepoId=owner_record["repo_id"],
|
|
480
972
|
UpdateTag=update_tag,
|
|
973
|
+
).consume()
|
|
974
|
+
|
|
975
|
+
for owner in repo_owners:
|
|
976
|
+
execute_write_with_retry(
|
|
977
|
+
neo4j_session,
|
|
978
|
+
_ingest_owner_tx,
|
|
979
|
+
owner,
|
|
980
|
+
account_type[owner["type"]],
|
|
481
981
|
)
|
|
482
982
|
|
|
483
983
|
|
|
484
984
|
@timeit
|
|
485
|
-
def load_collaborators(
|
|
486
|
-
|
|
985
|
+
def load_collaborators(
|
|
986
|
+
neo4j_session: neo4j.Session,
|
|
987
|
+
update_tag: int,
|
|
988
|
+
collaborators: Dict,
|
|
989
|
+
affiliation: str,
|
|
990
|
+
) -> None:
|
|
991
|
+
query = Template(
|
|
992
|
+
"""
|
|
487
993
|
UNWIND $UserData as user
|
|
488
994
|
|
|
489
995
|
MERGE (u:GitHubUser{id: user.url})
|
|
@@ -500,27 +1006,36 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
|
|
|
500
1006
|
MERGE (repo)<-[o:$rel_label]-(u)
|
|
501
1007
|
ON CREATE SET o.firstseen = timestamp()
|
|
502
1008
|
SET o.lastupdated = $UpdateTag
|
|
503
|
-
"""
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
1009
|
+
""",
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
def _ingest_collaborators_tx(
|
|
1013
|
+
tx: neo4j.Transaction,
|
|
1014
|
+
relationship_label: str,
|
|
1015
|
+
collaborator_data: List[Dict],
|
|
1016
|
+
) -> None:
|
|
1017
|
+
tx.run(
|
|
507
1018
|
query.safe_substitute(rel_label=relationship_label),
|
|
508
|
-
UserData=
|
|
1019
|
+
UserData=collaborator_data,
|
|
509
1020
|
UpdateTag=update_tag,
|
|
1021
|
+
).consume()
|
|
1022
|
+
|
|
1023
|
+
for collab_type, collab_data in collaborators.items():
|
|
1024
|
+
relationship_label = f"{affiliation}_COLLAB_{collab_type}"
|
|
1025
|
+
execute_write_with_retry(
|
|
1026
|
+
neo4j_session,
|
|
1027
|
+
_ingest_collaborators_tx,
|
|
1028
|
+
relationship_label,
|
|
1029
|
+
collab_data,
|
|
510
1030
|
)
|
|
511
1031
|
|
|
512
1032
|
|
|
513
1033
|
@timeit
|
|
514
|
-
def
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
@timeit
|
|
523
|
-
def load_python_requirements(neo4j_session: neo4j.Session, update_tag: int, requirements_objects: List[Dict]) -> None:
|
|
1034
|
+
def load_python_requirements(
|
|
1035
|
+
neo4j_session: neo4j.Session,
|
|
1036
|
+
update_tag: int,
|
|
1037
|
+
requirements_objects: List[Dict],
|
|
1038
|
+
) -> None:
|
|
524
1039
|
query = """
|
|
525
1040
|
UNWIND $Requirements AS req
|
|
526
1041
|
MERGE (lib:PythonLibrary:Dependency{id: req.id})
|
|
@@ -536,19 +1051,175 @@ def load_python_requirements(neo4j_session: neo4j.Session, update_tag: int, requ
|
|
|
536
1051
|
SET r.lastupdated = $UpdateTag,
|
|
537
1052
|
r.specifier = req.specifier
|
|
538
1053
|
"""
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
1054
|
+
|
|
1055
|
+
def _ingest_requirements_tx(tx: neo4j.Transaction) -> None:
|
|
1056
|
+
tx.run(
|
|
1057
|
+
query,
|
|
1058
|
+
Requirements=requirements_objects,
|
|
1059
|
+
UpdateTag=update_tag,
|
|
1060
|
+
).consume()
|
|
1061
|
+
|
|
1062
|
+
execute_write_with_retry(neo4j_session, _ingest_requirements_tx)
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
@timeit
|
|
1066
|
+
def load_github_dependencies(
|
|
1067
|
+
neo4j_session: neo4j.Session,
|
|
1068
|
+
update_tag: int,
|
|
1069
|
+
dependencies: List[Dict],
|
|
1070
|
+
) -> None:
|
|
1071
|
+
"""
|
|
1072
|
+
Ingest GitHub dependency data into Neo4j using the new data model
|
|
1073
|
+
:param neo4j_session: Neo4J session object for server communication
|
|
1074
|
+
:param update_tag: Timestamp used to determine data freshness
|
|
1075
|
+
:param dependencies: List of dependency objects from GitHub's dependency graph
|
|
1076
|
+
:return: Nothing
|
|
1077
|
+
"""
|
|
1078
|
+
# Group dependencies by both repo_url and manifest_id for schema-based loading
|
|
1079
|
+
dependencies_by_repo_and_manifest = defaultdict(list)
|
|
1080
|
+
|
|
1081
|
+
for dep in dependencies:
|
|
1082
|
+
repo_url = dep["repo_url"]
|
|
1083
|
+
manifest_id = dep["manifest_id"]
|
|
1084
|
+
# Create a key combining both repo_url and manifest_id
|
|
1085
|
+
group_key = (repo_url, manifest_id)
|
|
1086
|
+
# Remove repo_url and manifest_id from the dependency object since we'll pass them as kwargs
|
|
1087
|
+
dep_without_kwargs = {
|
|
1088
|
+
k: v for k, v in dep.items() if k not in ["repo_url", "manifest_id"]
|
|
1089
|
+
}
|
|
1090
|
+
dependencies_by_repo_and_manifest[group_key].append(dep_without_kwargs)
|
|
1091
|
+
|
|
1092
|
+
# Load dependencies for each repository/manifest combination separately
|
|
1093
|
+
for (
|
|
1094
|
+
repo_url,
|
|
1095
|
+
manifest_id,
|
|
1096
|
+
), group_dependencies in dependencies_by_repo_and_manifest.items():
|
|
1097
|
+
load_data(
|
|
1098
|
+
neo4j_session,
|
|
1099
|
+
GitHubDependencySchema(),
|
|
1100
|
+
group_dependencies,
|
|
1101
|
+
lastupdated=update_tag,
|
|
1102
|
+
repo_url=repo_url,
|
|
1103
|
+
manifest_id=manifest_id,
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
@timeit
|
|
1108
|
+
def load_github_dependency_manifests(
|
|
1109
|
+
neo4j_session: neo4j.Session,
|
|
1110
|
+
update_tag: int,
|
|
1111
|
+
manifests: List[Dict],
|
|
1112
|
+
) -> None:
|
|
1113
|
+
"""
|
|
1114
|
+
Ingest GitHub dependency manifests into Neo4j
|
|
1115
|
+
"""
|
|
1116
|
+
manifests_by_repo = defaultdict(list)
|
|
1117
|
+
|
|
1118
|
+
for manifest in manifests:
|
|
1119
|
+
repo_url = manifest["repo_url"]
|
|
1120
|
+
manifests_by_repo[repo_url].append(manifest)
|
|
1121
|
+
|
|
1122
|
+
# Load manifests for each repository separately
|
|
1123
|
+
for repo_url, repo_manifests in manifests_by_repo.items():
|
|
1124
|
+
load_data(
|
|
1125
|
+
neo4j_session,
|
|
1126
|
+
DependencyGraphManifestSchema(),
|
|
1127
|
+
repo_manifests,
|
|
1128
|
+
lastupdated=update_tag,
|
|
1129
|
+
repo_url=repo_url,
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
@timeit
|
|
1134
|
+
def cleanup_github_dependencies(
|
|
1135
|
+
neo4j_session: neo4j.Session,
|
|
1136
|
+
common_job_parameters: Dict[str, Any],
|
|
1137
|
+
repo_urls: List[str],
|
|
1138
|
+
) -> None:
|
|
1139
|
+
# Run cleanup for each repository separately
|
|
1140
|
+
for repo_url in repo_urls:
|
|
1141
|
+
cleanup_params = {**common_job_parameters, "repo_url": repo_url}
|
|
1142
|
+
GraphJob.from_node_schema(GitHubDependencySchema(), cleanup_params).run(
|
|
1143
|
+
neo4j_session
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
|
|
1147
|
+
@timeit
|
|
1148
|
+
def cleanup_github_manifests(
|
|
1149
|
+
neo4j_session: neo4j.Session,
|
|
1150
|
+
common_job_parameters: Dict[str, Any],
|
|
1151
|
+
repo_urls: List[str],
|
|
1152
|
+
) -> None:
|
|
1153
|
+
"""
|
|
1154
|
+
Delete GitHub dependency manifests and their relationships from the graph if they were not updated in the last sync.
|
|
1155
|
+
:param neo4j_session: Neo4j session
|
|
1156
|
+
:param common_job_parameters: Common job parameters containing UPDATE_TAG
|
|
1157
|
+
:param repo_urls: List of repository URLs to clean up manifests for
|
|
1158
|
+
"""
|
|
1159
|
+
# Run cleanup for each repository separately
|
|
1160
|
+
for repo_url in repo_urls:
|
|
1161
|
+
cleanup_params = {**common_job_parameters, "repo_url": repo_url}
|
|
1162
|
+
GraphJob.from_node_schema(DependencyGraphManifestSchema(), cleanup_params).run(
|
|
1163
|
+
neo4j_session
|
|
1164
|
+
)
|
|
1165
|
+
|
|
1166
|
+
|
|
1167
|
+
@timeit
|
|
1168
|
+
def load(
|
|
1169
|
+
neo4j_session: neo4j.Session,
|
|
1170
|
+
common_job_parameters: Dict,
|
|
1171
|
+
repo_data: Dict,
|
|
1172
|
+
) -> None:
|
|
1173
|
+
load_github_repos(
|
|
1174
|
+
neo4j_session,
|
|
1175
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1176
|
+
repo_data["repos"],
|
|
1177
|
+
)
|
|
1178
|
+
load_github_owners(
|
|
1179
|
+
neo4j_session,
|
|
1180
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1181
|
+
repo_data["repo_owners"],
|
|
1182
|
+
)
|
|
1183
|
+
load_github_languages(
|
|
1184
|
+
neo4j_session,
|
|
1185
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1186
|
+
repo_data["repo_languages"],
|
|
1187
|
+
)
|
|
1188
|
+
load_collaborators(
|
|
1189
|
+
neo4j_session,
|
|
1190
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1191
|
+
repo_data["repo_direct_collaborators"],
|
|
1192
|
+
"DIRECT",
|
|
1193
|
+
)
|
|
1194
|
+
load_collaborators(
|
|
1195
|
+
neo4j_session,
|
|
1196
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1197
|
+
repo_data["repo_outside_collaborators"],
|
|
1198
|
+
"OUTSIDE",
|
|
1199
|
+
)
|
|
1200
|
+
load_python_requirements(
|
|
1201
|
+
neo4j_session,
|
|
1202
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1203
|
+
repo_data["python_requirements"],
|
|
1204
|
+
)
|
|
1205
|
+
load_github_dependency_manifests(
|
|
1206
|
+
neo4j_session,
|
|
1207
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1208
|
+
repo_data["manifests"],
|
|
1209
|
+
)
|
|
1210
|
+
load_github_dependencies(
|
|
1211
|
+
neo4j_session,
|
|
1212
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1213
|
+
repo_data["dependencies"],
|
|
543
1214
|
)
|
|
544
1215
|
|
|
545
1216
|
|
|
546
1217
|
def sync(
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
1218
|
+
neo4j_session: neo4j.Session,
|
|
1219
|
+
common_job_parameters: Dict[str, Any],
|
|
1220
|
+
github_api_key: str,
|
|
1221
|
+
github_url: str,
|
|
1222
|
+
organization: str,
|
|
552
1223
|
) -> None:
|
|
553
1224
|
"""
|
|
554
1225
|
Performs the sequential tasks to collect, transform, and sync github data
|
|
@@ -561,6 +1232,46 @@ def sync(
|
|
|
561
1232
|
"""
|
|
562
1233
|
logger.info("Syncing GitHub repos")
|
|
563
1234
|
repos_json = get(github_api_key, github_url, organization)
|
|
564
|
-
|
|
1235
|
+
direct_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
1236
|
+
outside_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
1237
|
+
try:
|
|
1238
|
+
direct_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
1239
|
+
repos_json,
|
|
1240
|
+
"DIRECT",
|
|
1241
|
+
organization,
|
|
1242
|
+
github_url,
|
|
1243
|
+
github_api_key,
|
|
1244
|
+
)
|
|
1245
|
+
outside_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
1246
|
+
repos_json,
|
|
1247
|
+
"OUTSIDE",
|
|
1248
|
+
organization,
|
|
1249
|
+
github_url,
|
|
1250
|
+
github_api_key,
|
|
1251
|
+
)
|
|
1252
|
+
except TypeError:
|
|
1253
|
+
# due to permission errors or transient network error or some other nonsense
|
|
1254
|
+
logger.warning(
|
|
1255
|
+
"Unable to list repo collaborators due to permission errors; continuing on.",
|
|
1256
|
+
exc_info=True,
|
|
1257
|
+
)
|
|
1258
|
+
repo_data = transform(repos_json, direct_collabs, outside_collabs)
|
|
565
1259
|
load(neo4j_session, common_job_parameters, repo_data)
|
|
566
|
-
|
|
1260
|
+
|
|
1261
|
+
# Collect repository URLs that have dependencies for cleanup
|
|
1262
|
+
repo_urls_with_dependencies = list(
|
|
1263
|
+
{dep["repo_url"] for dep in repo_data["dependencies"]}
|
|
1264
|
+
)
|
|
1265
|
+
cleanup_github_dependencies(
|
|
1266
|
+
neo4j_session, common_job_parameters, repo_urls_with_dependencies
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
# Collect repository URLs that have manifests for cleanup
|
|
1270
|
+
repo_urls_with_manifests = list(
|
|
1271
|
+
{manifest["repo_url"] for manifest in repo_data["manifests"]}
|
|
1272
|
+
)
|
|
1273
|
+
cleanup_github_manifests(
|
|
1274
|
+
neo4j_session, common_job_parameters, repo_urls_with_manifests
|
|
1275
|
+
)
|
|
1276
|
+
|
|
1277
|
+
run_cleanup_job("github_repos_cleanup.json", neo4j_session, common_job_parameters)
|