cartography 0.104.0rc2__py3-none-any.whl → 0.123.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (642) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +466 -5
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/client/core/tx.py +357 -8
  6. cartography/config.py +153 -0
  7. cartography/data/azure_permission_relationships.yaml +20 -0
  8. cartography/data/gcp_permission_relationships.yaml +21 -0
  9. cartography/data/indexes.cypher +0 -186
  10. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  11. cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
  12. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  13. cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
  14. cartography/driftdetect/cli.py +3 -2
  15. cartography/graph/cleanupbuilder.py +198 -41
  16. cartography/graph/job.py +54 -6
  17. cartography/graph/querybuilder.py +528 -27
  18. cartography/graph/statement.py +5 -1
  19. cartography/intel/airbyte/__init__.py +105 -0
  20. cartography/intel/airbyte/connections.py +120 -0
  21. cartography/intel/airbyte/destinations.py +81 -0
  22. cartography/intel/airbyte/organizations.py +59 -0
  23. cartography/intel/airbyte/sources.py +78 -0
  24. cartography/intel/airbyte/tags.py +64 -0
  25. cartography/intel/airbyte/users.py +106 -0
  26. cartography/intel/airbyte/util.py +122 -0
  27. cartography/intel/airbyte/workspaces.py +63 -0
  28. cartography/intel/aws/__init__.py +24 -9
  29. cartography/intel/aws/acm.py +124 -0
  30. cartography/intel/aws/apigateway.py +253 -22
  31. cartography/intel/aws/apigatewayv2.py +116 -0
  32. cartography/intel/aws/cloudtrail.py +17 -39
  33. cartography/intel/aws/cloudtrail_management_events.py +962 -0
  34. cartography/intel/aws/cloudwatch.py +150 -4
  35. cartography/intel/aws/codebuild.py +132 -0
  36. cartography/intel/aws/cognito.py +201 -0
  37. cartography/intel/aws/config.py +7 -3
  38. cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
  39. cartography/intel/aws/ec2/instances.py +25 -1
  40. cartography/intel/aws/ec2/internet_gateways.py +4 -2
  41. cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
  42. cartography/intel/aws/ec2/network_interfaces.py +5 -1
  43. cartography/intel/aws/ec2/reserved_instances.py +3 -1
  44. cartography/intel/aws/ec2/security_groups.py +140 -122
  45. cartography/intel/aws/ec2/snapshots.py +47 -84
  46. cartography/intel/aws/ec2/subnets.py +37 -63
  47. cartography/intel/aws/ec2/tgw.py +11 -5
  48. cartography/intel/aws/ec2/volumes.py +1 -1
  49. cartography/intel/aws/ec2/vpc.py +140 -124
  50. cartography/intel/aws/ec2/vpc_peerings.py +262 -125
  51. cartography/intel/aws/ecr.py +269 -98
  52. cartography/intel/aws/ecr_image_layers.py +923 -0
  53. cartography/intel/aws/ecs.py +251 -380
  54. cartography/intel/aws/efs.py +179 -11
  55. cartography/intel/aws/elasticache.py +102 -79
  56. cartography/intel/aws/elasticsearch.py +13 -4
  57. cartography/intel/aws/eventbridge.py +164 -0
  58. cartography/intel/aws/glue.py +181 -0
  59. cartography/intel/aws/guardduty.py +443 -0
  60. cartography/intel/aws/iam.py +750 -493
  61. cartography/intel/aws/identitycenter.py +605 -83
  62. cartography/intel/aws/inspector.py +221 -105
  63. cartography/intel/aws/kms.py +173 -201
  64. cartography/intel/aws/lambda_function.py +272 -189
  65. cartography/intel/aws/organizations.py +10 -9
  66. cartography/intel/aws/permission_relationships.py +10 -20
  67. cartography/intel/aws/rds.py +337 -446
  68. cartography/intel/aws/redshift.py +9 -4
  69. cartography/intel/aws/resourcegroupstaggingapi.py +78 -19
  70. cartography/intel/aws/resources.py +18 -0
  71. cartography/intel/aws/route53.py +386 -332
  72. cartography/intel/aws/s3.py +322 -14
  73. cartography/intel/aws/secretsmanager.py +81 -49
  74. cartography/intel/aws/securityhub.py +3 -1
  75. cartography/intel/aws/sns.py +62 -2
  76. cartography/intel/aws/sqs.py +36 -90
  77. cartography/intel/aws/ssm.py +3 -5
  78. cartography/intel/azure/__init__.py +202 -48
  79. cartography/intel/azure/aks.py +175 -0
  80. cartography/intel/azure/app_service.py +105 -0
  81. cartography/intel/azure/compute.py +59 -112
  82. cartography/intel/azure/container_instances.py +95 -0
  83. cartography/intel/azure/cosmosdb.py +222 -361
  84. cartography/intel/azure/data_factory.py +85 -0
  85. cartography/intel/azure/data_factory_dataset.py +128 -0
  86. cartography/intel/azure/data_factory_linked_service.py +119 -0
  87. cartography/intel/azure/data_factory_pipeline.py +142 -0
  88. cartography/intel/azure/data_lake.py +124 -0
  89. cartography/intel/azure/event_grid.py +94 -0
  90. cartography/intel/azure/functions.py +124 -0
  91. cartography/intel/azure/load_balancers.py +263 -0
  92. cartography/intel/azure/logic_apps.py +101 -0
  93. cartography/intel/azure/monitor.py +105 -0
  94. cartography/intel/azure/network.py +467 -0
  95. cartography/intel/azure/permission_relationships.py +466 -0
  96. cartography/intel/azure/rbac.py +309 -0
  97. cartography/intel/azure/resource_groups.py +82 -0
  98. cartography/intel/azure/security_center.py +106 -0
  99. cartography/intel/azure/sql.py +145 -292
  100. cartography/intel/azure/storage.py +185 -262
  101. cartography/intel/azure/subscription.py +21 -43
  102. cartography/intel/azure/tenant.py +39 -30
  103. cartography/intel/azure/util/common.py +13 -0
  104. cartography/intel/azure/util/credentials.py +49 -174
  105. cartography/intel/azure/util/tag.py +41 -0
  106. cartography/intel/create_indexes.py +2 -1
  107. cartography/intel/crowdstrike/spotlight.py +5 -2
  108. cartography/intel/dns.py +5 -2
  109. cartography/intel/entra/__init__.py +100 -1
  110. cartography/intel/entra/app_role_assignments.py +284 -0
  111. cartography/intel/entra/applications.py +182 -0
  112. cartography/intel/entra/federation/__init__.py +0 -0
  113. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  114. cartography/intel/entra/groups.py +198 -0
  115. cartography/intel/entra/ou.py +48 -24
  116. cartography/intel/entra/service_principals.py +217 -0
  117. cartography/intel/entra/users.py +105 -57
  118. cartography/intel/gcp/__init__.py +334 -396
  119. cartography/intel/gcp/bigtable_app_profile.py +101 -0
  120. cartography/intel/gcp/bigtable_backup.py +91 -0
  121. cartography/intel/gcp/bigtable_cluster.py +93 -0
  122. cartography/intel/gcp/bigtable_instance.py +86 -0
  123. cartography/intel/gcp/bigtable_table.py +87 -0
  124. cartography/intel/gcp/cai.py +292 -0
  125. cartography/intel/gcp/clients.py +112 -0
  126. cartography/intel/gcp/compute.py +128 -119
  127. cartography/intel/gcp/crm/__init__.py +0 -0
  128. cartography/intel/gcp/crm/folders.py +114 -0
  129. cartography/intel/gcp/crm/orgs.py +70 -0
  130. cartography/intel/gcp/crm/projects.py +120 -0
  131. cartography/intel/gcp/dns.py +83 -169
  132. cartography/intel/gcp/gke.py +72 -113
  133. cartography/intel/gcp/iam.py +111 -91
  134. cartography/intel/gcp/permission_relationships.py +394 -0
  135. cartography/intel/gcp/policy_bindings.py +225 -0
  136. cartography/intel/gcp/storage.py +75 -159
  137. cartography/intel/github/__init__.py +62 -25
  138. cartography/intel/github/commits.py +423 -0
  139. cartography/intel/github/repos.py +463 -85
  140. cartography/intel/github/teams.py +3 -3
  141. cartography/intel/github/users.py +5 -0
  142. cartography/intel/github/util.py +12 -0
  143. cartography/intel/googleworkspace/__init__.py +193 -0
  144. cartography/intel/googleworkspace/devices.py +254 -0
  145. cartography/intel/googleworkspace/groups.py +568 -0
  146. cartography/intel/googleworkspace/oauth_apps.py +259 -0
  147. cartography/intel/googleworkspace/tenant.py +85 -0
  148. cartography/intel/googleworkspace/users.py +138 -0
  149. cartography/intel/gsuite/__init__.py +17 -9
  150. cartography/intel/gsuite/groups.py +291 -0
  151. cartography/intel/gsuite/users.py +142 -0
  152. cartography/intel/jamf/computers.py +7 -1
  153. cartography/intel/keycloak/__init__.py +153 -0
  154. cartography/intel/keycloak/authenticationexecutions.py +322 -0
  155. cartography/intel/keycloak/authenticationflows.py +77 -0
  156. cartography/intel/keycloak/clients.py +187 -0
  157. cartography/intel/keycloak/groups.py +126 -0
  158. cartography/intel/keycloak/identityproviders.py +94 -0
  159. cartography/intel/keycloak/organizations.py +163 -0
  160. cartography/intel/keycloak/realms.py +61 -0
  161. cartography/intel/keycloak/roles.py +202 -0
  162. cartography/intel/keycloak/scopes.py +73 -0
  163. cartography/intel/keycloak/users.py +70 -0
  164. cartography/intel/keycloak/util.py +47 -0
  165. cartography/intel/kubernetes/__init__.py +60 -14
  166. cartography/intel/kubernetes/clusters.py +86 -0
  167. cartography/intel/kubernetes/eks.py +402 -0
  168. cartography/intel/kubernetes/namespaces.py +59 -57
  169. cartography/intel/kubernetes/pods.py +168 -75
  170. cartography/intel/kubernetes/rbac.py +597 -0
  171. cartography/intel/kubernetes/secrets.py +95 -45
  172. cartography/intel/kubernetes/services.py +131 -67
  173. cartography/intel/kubernetes/util.py +142 -14
  174. cartography/intel/oci/iam.py +23 -9
  175. cartography/intel/oci/organizations.py +3 -1
  176. cartography/intel/oci/utils.py +28 -5
  177. cartography/intel/okta/applications.py +15 -5
  178. cartography/intel/okta/awssaml.py +14 -10
  179. cartography/intel/okta/factors.py +3 -1
  180. cartography/intel/okta/groups.py +5 -2
  181. cartography/intel/okta/organization.py +3 -1
  182. cartography/intel/okta/origins.py +3 -1
  183. cartography/intel/okta/roles.py +5 -2
  184. cartography/intel/okta/users.py +10 -2
  185. cartography/intel/ontology/__init__.py +44 -0
  186. cartography/intel/ontology/devices.py +54 -0
  187. cartography/intel/ontology/users.py +54 -0
  188. cartography/intel/ontology/utils.py +176 -0
  189. cartography/intel/pagerduty/escalation_policies.py +13 -6
  190. cartography/intel/pagerduty/schedules.py +9 -4
  191. cartography/intel/pagerduty/services.py +7 -3
  192. cartography/intel/pagerduty/teams.py +5 -2
  193. cartography/intel/pagerduty/users.py +3 -1
  194. cartography/intel/pagerduty/vendors.py +3 -1
  195. cartography/intel/scaleway/__init__.py +127 -0
  196. cartography/intel/scaleway/iam/__init__.py +0 -0
  197. cartography/intel/scaleway/iam/apikeys.py +71 -0
  198. cartography/intel/scaleway/iam/applications.py +71 -0
  199. cartography/intel/scaleway/iam/groups.py +71 -0
  200. cartography/intel/scaleway/iam/users.py +71 -0
  201. cartography/intel/scaleway/instances/__init__.py +0 -0
  202. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  203. cartography/intel/scaleway/instances/instances.py +92 -0
  204. cartography/intel/scaleway/projects.py +79 -0
  205. cartography/intel/scaleway/storage/__init__.py +0 -0
  206. cartography/intel/scaleway/storage/snapshots.py +86 -0
  207. cartography/intel/scaleway/storage/volumes.py +84 -0
  208. cartography/intel/scaleway/utils.py +37 -0
  209. cartography/intel/sentinelone/__init__.py +75 -0
  210. cartography/intel/sentinelone/account.py +140 -0
  211. cartography/intel/sentinelone/agent.py +139 -0
  212. cartography/intel/sentinelone/api.py +124 -0
  213. cartography/intel/sentinelone/application.py +248 -0
  214. cartography/intel/sentinelone/cve.py +119 -0
  215. cartography/intel/sentinelone/utils.py +28 -0
  216. cartography/intel/slack/__init__.py +78 -0
  217. cartography/intel/slack/channels.py +80 -0
  218. cartography/intel/slack/groups.py +90 -0
  219. cartography/intel/slack/teams.py +65 -0
  220. cartography/intel/slack/users.py +57 -0
  221. cartography/intel/slack/utils.py +29 -0
  222. cartography/intel/spacelift/__init__.py +161 -0
  223. cartography/intel/spacelift/account.py +73 -0
  224. cartography/intel/spacelift/ec2_ownership.py +280 -0
  225. cartography/intel/spacelift/runs.py +463 -0
  226. cartography/intel/spacelift/spaces.py +112 -0
  227. cartography/intel/spacelift/stacks.py +119 -0
  228. cartography/intel/spacelift/util.py +122 -0
  229. cartography/intel/spacelift/workerpools.py +131 -0
  230. cartography/intel/spacelift/workers.py +128 -0
  231. cartography/intel/trivy/__init__.py +272 -0
  232. cartography/intel/trivy/scanner.py +386 -0
  233. cartography/models/airbyte/__init__.py +0 -0
  234. cartography/models/airbyte/connection.py +138 -0
  235. cartography/models/airbyte/destination.py +75 -0
  236. cartography/models/airbyte/organization.py +19 -0
  237. cartography/models/airbyte/source.py +75 -0
  238. cartography/models/airbyte/stream.py +74 -0
  239. cartography/models/airbyte/tag.py +69 -0
  240. cartography/models/airbyte/user.py +115 -0
  241. cartography/models/airbyte/workspace.py +46 -0
  242. cartography/models/anthropic/apikey.py +4 -0
  243. cartography/models/anthropic/user.py +4 -0
  244. cartography/models/aws/acm/__init__.py +0 -0
  245. cartography/models/aws/acm/certificate.py +75 -0
  246. cartography/models/aws/apigateway/__init__.py +0 -0
  247. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  248. cartography/models/aws/apigateway/apigatewayintegration.py +79 -0
  249. cartography/models/aws/apigateway/apigatewaymethod.py +74 -0
  250. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  251. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  252. cartography/models/aws/cloudtrail/management_events.py +153 -0
  253. cartography/models/aws/cloudtrail/trail.py +45 -0
  254. cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
  255. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  256. cartography/models/aws/codebuild/__init__.py +0 -0
  257. cartography/models/aws/codebuild/project.py +49 -0
  258. cartography/models/aws/cognito/__init__.py +0 -0
  259. cartography/models/aws/cognito/identity_pool.py +70 -0
  260. cartography/models/aws/cognito/user_pool.py +47 -0
  261. cartography/models/aws/dynamodb/tables.py +2 -0
  262. cartography/models/aws/ec2/instances.py +25 -1
  263. cartography/models/aws/ec2/networkinterfaces.py +4 -0
  264. cartography/models/aws/ec2/security_group_rules.py +109 -0
  265. cartography/models/aws/ec2/security_groups.py +90 -0
  266. cartography/models/aws/ec2/snapshots.py +58 -0
  267. cartography/models/aws/ec2/subnet_instance.py +2 -0
  268. cartography/models/aws/ec2/subnet_networkinterface.py +2 -0
  269. cartography/models/aws/ec2/subnets.py +65 -0
  270. cartography/models/aws/ec2/volumes.py +20 -0
  271. cartography/models/aws/ec2/vpc.py +46 -0
  272. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  273. cartography/models/aws/ec2/vpc_peering.py +157 -0
  274. cartography/models/aws/ecr/__init__.py +0 -0
  275. cartography/models/aws/ecr/image.py +146 -0
  276. cartography/models/aws/ecr/image_layer.py +107 -0
  277. cartography/models/aws/ecr/repository.py +72 -0
  278. cartography/models/aws/ecr/repository_image.py +95 -0
  279. cartography/models/aws/ecs/__init__.py +0 -0
  280. cartography/models/aws/ecs/clusters.py +64 -0
  281. cartography/models/aws/ecs/container_definitions.py +93 -0
  282. cartography/models/aws/ecs/container_instances.py +84 -0
  283. cartography/models/aws/ecs/containers.py +101 -0
  284. cartography/models/aws/ecs/services.py +134 -0
  285. cartography/models/aws/ecs/task_definitions.py +135 -0
  286. cartography/models/aws/ecs/tasks.py +134 -0
  287. cartography/models/aws/efs/access_point.py +77 -0
  288. cartography/models/aws/efs/file_system.py +60 -0
  289. cartography/models/aws/efs/mount_target.py +29 -2
  290. cartography/models/aws/elasticache/__init__.py +0 -0
  291. cartography/models/aws/elasticache/cluster.py +65 -0
  292. cartography/models/aws/elasticache/topic.py +67 -0
  293. cartography/models/aws/eventbridge/__init__.py +0 -0
  294. cartography/models/aws/eventbridge/rule.py +77 -0
  295. cartography/models/aws/eventbridge/target.py +71 -0
  296. cartography/models/aws/glue/__init__.py +0 -0
  297. cartography/models/aws/glue/connection.py +51 -0
  298. cartography/models/aws/glue/job.py +69 -0
  299. cartography/models/aws/guardduty/__init__.py +1 -0
  300. cartography/models/aws/guardduty/detectors.py +50 -0
  301. cartography/models/aws/guardduty/findings.py +121 -0
  302. cartography/models/aws/iam/access_key.py +103 -0
  303. cartography/models/aws/iam/account_role.py +24 -0
  304. cartography/models/aws/iam/federated_principal.py +60 -0
  305. cartography/models/aws/iam/group.py +60 -0
  306. cartography/models/aws/iam/group_membership.py +27 -0
  307. cartography/models/aws/iam/inline_policy.py +78 -0
  308. cartography/models/aws/iam/managed_policy.py +51 -0
  309. cartography/models/aws/iam/policy_statement.py +57 -0
  310. cartography/models/aws/iam/role.py +83 -0
  311. cartography/models/aws/iam/root_principal.py +52 -0
  312. cartography/models/aws/iam/service_principal.py +30 -0
  313. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  314. cartography/models/aws/iam/user.py +59 -0
  315. cartography/models/aws/identitycenter/awsidentitycenter.py +1 -0
  316. cartography/models/aws/identitycenter/awspermissionset.py +70 -0
  317. cartography/models/aws/identitycenter/awssogroup.py +70 -0
  318. cartography/models/aws/identitycenter/awsssouser.py +49 -9
  319. cartography/models/aws/inspector/findings.py +37 -0
  320. cartography/models/aws/inspector/packages.py +1 -31
  321. cartography/models/aws/kms/__init__.py +0 -0
  322. cartography/models/aws/kms/aliases.py +86 -0
  323. cartography/models/aws/kms/grants.py +65 -0
  324. cartography/models/aws/kms/keys.py +88 -0
  325. cartography/models/aws/lambda_function/__init__.py +0 -0
  326. cartography/models/aws/lambda_function/alias.py +74 -0
  327. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  328. cartography/models/aws/lambda_function/lambda_function.py +91 -0
  329. cartography/models/aws/lambda_function/layer.py +72 -0
  330. cartography/models/aws/rds/__init__.py +0 -0
  331. cartography/models/aws/rds/cluster.py +91 -0
  332. cartography/models/aws/rds/event_subscription.py +146 -0
  333. cartography/models/aws/rds/instance.py +156 -0
  334. cartography/models/aws/rds/snapshot.py +108 -0
  335. cartography/models/aws/rds/subnet_group.py +101 -0
  336. cartography/models/aws/route53/__init__.py +0 -0
  337. cartography/models/aws/route53/dnsrecord.py +235 -0
  338. cartography/models/aws/route53/nameserver.py +63 -0
  339. cartography/models/aws/route53/subzone.py +40 -0
  340. cartography/models/aws/route53/zone.py +47 -0
  341. cartography/models/aws/s3/notification.py +24 -0
  342. cartography/models/aws/secretsmanager/secret.py +106 -0
  343. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  344. cartography/models/aws/sns/topic_subscription.py +74 -0
  345. cartography/models/aws/sqs/__init__.py +0 -0
  346. cartography/models/aws/sqs/queue.py +89 -0
  347. cartography/models/azure/__init__.py +0 -0
  348. cartography/models/azure/aks_cluster.py +54 -0
  349. cartography/models/azure/aks_nodepool.py +54 -0
  350. cartography/models/azure/app_service.py +59 -0
  351. cartography/models/azure/container_instance.py +57 -0
  352. cartography/models/azure/cosmosdb/__init__.py +0 -0
  353. cartography/models/azure/cosmosdb/account.py +77 -0
  354. cartography/models/azure/cosmosdb/accountfailoverpolicy.py +77 -0
  355. cartography/models/azure/cosmosdb/cassandrakeyspace.py +82 -0
  356. cartography/models/azure/cosmosdb/cassandratable.py +81 -0
  357. cartography/models/azure/cosmosdb/corspolicy.py +74 -0
  358. cartography/models/azure/cosmosdb/dblocation.py +120 -0
  359. cartography/models/azure/cosmosdb/mongodbcollection.py +82 -0
  360. cartography/models/azure/cosmosdb/mongodbdatabase.py +78 -0
  361. cartography/models/azure/cosmosdb/privateendpointconnection.py +81 -0
  362. cartography/models/azure/cosmosdb/sqlcontainer.py +88 -0
  363. cartography/models/azure/cosmosdb/sqldatabase.py +78 -0
  364. cartography/models/azure/cosmosdb/tableresource.py +76 -0
  365. cartography/models/azure/cosmosdb/virtualnetworkrule.py +78 -0
  366. cartography/models/azure/data_factory/__init__.py +0 -0
  367. cartography/models/azure/data_factory/data_factory.py +51 -0
  368. cartography/models/azure/data_factory/data_factory_dataset.py +94 -0
  369. cartography/models/azure/data_factory/data_factory_linked_service.py +78 -0
  370. cartography/models/azure/data_factory/data_factory_pipeline.py +93 -0
  371. cartography/models/azure/data_lake_filesystem.py +51 -0
  372. cartography/models/azure/event_grid_topic.py +57 -0
  373. cartography/models/azure/function_app.py +59 -0
  374. cartography/models/azure/load_balancer/__init__.py +0 -0
  375. cartography/models/azure/load_balancer/load_balancer.py +49 -0
  376. cartography/models/azure/load_balancer/load_balancer_backend_pool.py +73 -0
  377. cartography/models/azure/load_balancer/load_balancer_frontend_ip.py +75 -0
  378. cartography/models/azure/load_balancer/load_balancer_inbound_nat_rule.py +78 -0
  379. cartography/models/azure/load_balancer/load_balancer_rule.py +108 -0
  380. cartography/models/azure/logic_apps.py +56 -0
  381. cartography/models/azure/monitor.py +54 -0
  382. cartography/models/azure/network_interface.py +112 -0
  383. cartography/models/azure/network_security_group.py +50 -0
  384. cartography/models/azure/permission_relationships.py +60 -0
  385. cartography/models/azure/principal.py +41 -0
  386. cartography/models/azure/public_ip_address.py +50 -0
  387. cartography/models/azure/rbac.py +268 -0
  388. cartography/models/azure/resource_groups.py +52 -0
  389. cartography/models/azure/security_center.py +50 -0
  390. cartography/models/azure/sql/__init__.py +0 -0
  391. cartography/models/azure/sql/databasethreatdetectionpolicy.py +85 -0
  392. cartography/models/azure/sql/elasticpool.py +77 -0
  393. cartography/models/azure/sql/failovergroup.py +73 -0
  394. cartography/models/azure/sql/recoverabledatabase.py +75 -0
  395. cartography/models/azure/sql/replicationlink.py +81 -0
  396. cartography/models/azure/sql/restorabledroppeddatabase.py +82 -0
  397. cartography/models/azure/sql/restorepoint.py +74 -0
  398. cartography/models/azure/sql/serveradadministrator.py +74 -0
  399. cartography/models/azure/sql/serverdnsalias.py +71 -0
  400. cartography/models/azure/sql/sqldatabase.py +85 -0
  401. cartography/models/azure/sql/sqlserver.py +50 -0
  402. cartography/models/azure/sql/transparentdataencryption.py +76 -0
  403. cartography/models/azure/storage/__init__.py +0 -0
  404. cartography/models/azure/storage/account.py +59 -0
  405. cartography/models/azure/storage/blobcontainer.py +85 -0
  406. cartography/models/azure/storage/blobservice.py +71 -0
  407. cartography/models/azure/storage/fileservice.py +71 -0
  408. cartography/models/azure/storage/fileshare.py +82 -0
  409. cartography/models/azure/storage/queue.py +71 -0
  410. cartography/models/azure/storage/queueservice.py +73 -0
  411. cartography/models/azure/storage/table.py +72 -0
  412. cartography/models/azure/storage/tableservice.py +73 -0
  413. cartography/models/azure/subnet.py +101 -0
  414. cartography/models/azure/subscription.py +47 -0
  415. cartography/models/azure/tags/__init__.py +0 -0
  416. cartography/models/azure/tags/storage_tag.py +40 -0
  417. cartography/models/azure/tags/tag.py +37 -0
  418. cartography/models/azure/tenant.py +17 -0
  419. cartography/models/azure/virtual_network.py +49 -0
  420. cartography/models/azure/vm/__init__.py +0 -0
  421. cartography/models/azure/vm/datadisk.py +80 -0
  422. cartography/models/azure/vm/disk.py +55 -0
  423. cartography/models/azure/vm/snapshot.py +56 -0
  424. cartography/models/azure/vm/virtualmachine.py +59 -0
  425. cartography/models/bigfix/bigfix_computer.py +1 -1
  426. cartography/models/cloudflare/member.py +4 -0
  427. cartography/models/core/common.py +1 -0
  428. cartography/models/core/nodes.py +15 -2
  429. cartography/models/core/relationships.py +44 -0
  430. cartography/models/crowdstrike/hosts.py +1 -1
  431. cartography/models/digitalocean/droplet.py +2 -0
  432. cartography/models/duo/endpoint.py +1 -1
  433. cartography/models/duo/phone.py +2 -2
  434. cartography/models/duo/user.py +4 -0
  435. cartography/models/entra/app_role_assignment.py +115 -0
  436. cartography/models/entra/application.py +49 -0
  437. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  438. cartography/models/entra/group.py +117 -0
  439. cartography/models/entra/service_principal.py +104 -0
  440. cartography/models/entra/user.py +42 -51
  441. cartography/models/gcp/__init__.py +0 -0
  442. cartography/models/gcp/bigtable/__init__.py +0 -0
  443. cartography/models/gcp/bigtable/app_profile.py +94 -0
  444. cartography/models/gcp/bigtable/backup.py +91 -0
  445. cartography/models/gcp/bigtable/cluster.py +73 -0
  446. cartography/models/gcp/bigtable/instance.py +52 -0
  447. cartography/models/gcp/bigtable/table.py +69 -0
  448. cartography/models/gcp/compute/__init__.py +0 -0
  449. cartography/models/gcp/compute/subnet.py +74 -0
  450. cartography/models/gcp/compute/vpc.py +50 -0
  451. cartography/models/gcp/crm/__init__.py +0 -0
  452. cartography/models/gcp/crm/folders.py +98 -0
  453. cartography/models/gcp/crm/organizations.py +21 -0
  454. cartography/models/gcp/crm/projects.py +100 -0
  455. cartography/models/gcp/dns.py +109 -0
  456. cartography/models/gcp/gke.py +69 -0
  457. cartography/models/gcp/iam.py +3 -0
  458. cartography/models/gcp/permission_relationships.py +61 -0
  459. cartography/models/gcp/policy_bindings.py +93 -0
  460. cartography/models/gcp/storage/__init__.py +0 -0
  461. cartography/models/gcp/storage/bucket.py +119 -0
  462. cartography/models/github/commits.py +63 -0
  463. cartography/models/github/dependencies.py +73 -0
  464. cartography/models/github/manifests.py +49 -0
  465. cartography/models/github/users.py +10 -0
  466. cartography/models/googleworkspace/__init__.py +0 -0
  467. cartography/models/googleworkspace/device.py +132 -0
  468. cartography/models/googleworkspace/group.py +382 -0
  469. cartography/models/googleworkspace/oauth_app.py +124 -0
  470. cartography/models/googleworkspace/tenant.py +30 -0
  471. cartography/models/googleworkspace/user.py +113 -0
  472. cartography/models/gsuite/__init__.py +0 -0
  473. cartography/models/gsuite/group.py +218 -0
  474. cartography/models/gsuite/tenant.py +29 -0
  475. cartography/models/gsuite/user.py +107 -0
  476. cartography/models/kandji/device.py +1 -2
  477. cartography/models/keycloak/__init__.py +0 -0
  478. cartography/models/keycloak/authenticationexecution.py +160 -0
  479. cartography/models/keycloak/authenticationflow.py +54 -0
  480. cartography/models/keycloak/client.py +179 -0
  481. cartography/models/keycloak/group.py +101 -0
  482. cartography/models/keycloak/identityprovider.py +89 -0
  483. cartography/models/keycloak/organization.py +116 -0
  484. cartography/models/keycloak/organizationdomain.py +73 -0
  485. cartography/models/keycloak/realm.py +173 -0
  486. cartography/models/keycloak/role.py +126 -0
  487. cartography/models/keycloak/scope.py +73 -0
  488. cartography/models/keycloak/user.py +55 -0
  489. cartography/models/kubernetes/__init__.py +0 -0
  490. cartography/models/kubernetes/clusterrolebindings.py +138 -0
  491. cartography/models/kubernetes/clusterroles.py +52 -0
  492. cartography/models/kubernetes/clusters.py +26 -0
  493. cartography/models/kubernetes/containers.py +133 -0
  494. cartography/models/kubernetes/groups.py +107 -0
  495. cartography/models/kubernetes/namespaces.py +51 -0
  496. cartography/models/kubernetes/oidc.py +51 -0
  497. cartography/models/kubernetes/pods.py +80 -0
  498. cartography/models/kubernetes/rolebindings.py +159 -0
  499. cartography/models/kubernetes/roles.py +76 -0
  500. cartography/models/kubernetes/secrets.py +79 -0
  501. cartography/models/kubernetes/serviceaccounts.py +77 -0
  502. cartography/models/kubernetes/services.py +108 -0
  503. cartography/models/kubernetes/users.py +105 -0
  504. cartography/models/lastpass/user.py +4 -0
  505. cartography/models/ontology/__init__.py +0 -0
  506. cartography/models/ontology/device.py +137 -0
  507. cartography/models/ontology/mapping/__init__.py +76 -0
  508. cartography/models/ontology/mapping/data/__init__.py +0 -0
  509. cartography/models/ontology/mapping/data/apikeys.py +93 -0
  510. cartography/models/ontology/mapping/data/computeinstance.py +95 -0
  511. cartography/models/ontology/mapping/data/containers.py +88 -0
  512. cartography/models/ontology/mapping/data/databases.py +182 -0
  513. cartography/models/ontology/mapping/data/devices.py +194 -0
  514. cartography/models/ontology/mapping/data/thirdpartyapps.py +140 -0
  515. cartography/models/ontology/mapping/data/useraccounts.py +416 -0
  516. cartography/models/ontology/mapping/data/users.py +63 -0
  517. cartography/models/ontology/mapping/specs.py +85 -0
  518. cartography/models/ontology/user.py +51 -0
  519. cartography/models/openai/adminapikey.py +4 -0
  520. cartography/models/openai/apikey.py +4 -0
  521. cartography/models/openai/user.py +4 -0
  522. cartography/models/scaleway/__init__.py +0 -0
  523. cartography/models/scaleway/iam/__init__.py +0 -0
  524. cartography/models/scaleway/iam/apikey.py +100 -0
  525. cartography/models/scaleway/iam/application.py +52 -0
  526. cartography/models/scaleway/iam/group.py +95 -0
  527. cartography/models/scaleway/iam/user.py +64 -0
  528. cartography/models/scaleway/instance/__init__.py +0 -0
  529. cartography/models/scaleway/instance/flexibleip.py +52 -0
  530. cartography/models/scaleway/instance/instance.py +120 -0
  531. cartography/models/scaleway/organization.py +19 -0
  532. cartography/models/scaleway/project.py +48 -0
  533. cartography/models/scaleway/storage/__init__.py +0 -0
  534. cartography/models/scaleway/storage/snapshot.py +78 -0
  535. cartography/models/scaleway/storage/volume.py +51 -0
  536. cartography/models/sentinelone/__init__.py +1 -0
  537. cartography/models/sentinelone/account.py +40 -0
  538. cartography/models/sentinelone/agent.py +50 -0
  539. cartography/models/sentinelone/application.py +44 -0
  540. cartography/models/sentinelone/application_version.py +96 -0
  541. cartography/models/sentinelone/cve.py +73 -0
  542. cartography/models/slack/__init__.py +0 -0
  543. cartography/models/slack/channels.py +92 -0
  544. cartography/models/slack/group.py +129 -0
  545. cartography/models/slack/team.py +22 -0
  546. cartography/models/slack/user.py +62 -0
  547. cartography/models/snipeit/asset.py +2 -0
  548. cartography/models/snipeit/user.py +4 -0
  549. cartography/models/spacelift/__init__.py +0 -0
  550. cartography/models/spacelift/cloudtrailevent.py +120 -0
  551. cartography/models/spacelift/run.py +162 -0
  552. cartography/models/spacelift/space.py +131 -0
  553. cartography/models/spacelift/spaceliftaccount.py +31 -0
  554. cartography/models/spacelift/spaceliftgitcommit.py +157 -0
  555. cartography/models/spacelift/stack.py +96 -0
  556. cartography/models/spacelift/user.py +63 -0
  557. cartography/models/spacelift/worker.py +97 -0
  558. cartography/models/spacelift/workerpool.py +90 -0
  559. cartography/models/tailscale/device.py +2 -1
  560. cartography/models/tailscale/user.py +6 -1
  561. cartography/models/trivy/__init__.py +0 -0
  562. cartography/models/trivy/findings.py +66 -0
  563. cartography/models/trivy/fix.py +66 -0
  564. cartography/models/trivy/package.py +71 -0
  565. cartography/rules/README.md +1 -0
  566. cartography/rules/__init__.py +0 -0
  567. cartography/rules/cli.py +261 -0
  568. cartography/rules/data/__init__.py +0 -0
  569. cartography/rules/data/rules/__init__.py +46 -0
  570. cartography/rules/data/rules/cloud_security_product_deactivated.py +49 -0
  571. cartography/rules/data/rules/compute_instance_exposed.py +51 -0
  572. cartography/rules/data/rules/database_instance_exposed.py +53 -0
  573. cartography/rules/data/rules/delegation_boundary_modifiable.py +90 -0
  574. cartography/rules/data/rules/identity_administration_privileges.py +100 -0
  575. cartography/rules/data/rules/inactive_user_active_accounts.py +48 -0
  576. cartography/rules/data/rules/malicious_npm_dependencies_shai_hulud.py +2222 -0
  577. cartography/rules/data/rules/mfa_missing.py +46 -0
  578. cartography/rules/data/rules/object_storage_public.py +100 -0
  579. cartography/rules/data/rules/policy_administration_privileges.py +104 -0
  580. cartography/rules/data/rules/unmanaged_accounts.py +43 -0
  581. cartography/rules/data/rules/workload_identity_admin_capabilities.py +193 -0
  582. cartography/rules/formatters.py +108 -0
  583. cartography/rules/runners.py +216 -0
  584. cartography/rules/spec/__init__.py +0 -0
  585. cartography/rules/spec/model.py +267 -0
  586. cartography/rules/spec/result.py +38 -0
  587. cartography/sync.py +25 -5
  588. cartography/util.py +101 -31
  589. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/METADATA +61 -22
  590. cartography-0.123.0.dist-info/RECORD +856 -0
  591. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/entry_points.txt +1 -0
  592. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  593. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  594. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  595. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  596. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  597. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  598. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  599. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  600. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  601. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  602. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  603. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  604. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  605. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  606. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  607. cartography/data/jobs/cleanup/aws_import_vpc_peering_cleanup.json +0 -45
  608. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  609. cartography/data/jobs/cleanup/azure_cosmosdb_cassandra_keyspace_cleanup.json +0 -25
  610. cartography/data/jobs/cleanup/azure_cosmosdb_cors_details.json +0 -15
  611. cartography/data/jobs/cleanup/azure_cosmosdb_mongodb_database_cleanup.json +0 -25
  612. cartography/data/jobs/cleanup/azure_cosmosdb_sql_database_cleanup.json +0 -25
  613. cartography/data/jobs/cleanup/azure_cosmosdb_table_resources_cleanup.json +0 -15
  614. cartography/data/jobs/cleanup/azure_database_account_cleanup.json +0 -85
  615. cartography/data/jobs/cleanup/azure_import_disks_cleanup.json +0 -15
  616. cartography/data/jobs/cleanup/azure_import_snapshots_cleanup.json +0 -15
  617. cartography/data/jobs/cleanup/azure_import_virtual_machines_cleanup.json +0 -25
  618. cartography/data/jobs/cleanup/azure_sql_server_cleanup.json +0 -125
  619. cartography/data/jobs/cleanup/azure_storage_account_cleanup.json +0 -95
  620. cartography/data/jobs/cleanup/azure_subscriptions_cleanup.json +0 -14
  621. cartography/data/jobs/cleanup/azure_tenant_cleanup.json +0 -9
  622. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  623. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  624. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  625. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  626. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  627. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  628. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  629. cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
  630. cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
  631. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  632. cartography/intel/gcp/crm.py +0 -355
  633. cartography/intel/gsuite/api.py +0 -342
  634. cartography-0.104.0rc2.dist-info/RECORD +0 -455
  635. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  636. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  637. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  638. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  639. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  640. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/WHEEL +0 -0
  641. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/licenses/LICENSE +0 -0
  642. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,923 @@
1
+ """
2
+ ECR Image Layers module - fetches and syncs detailed container image layer information.
3
+
4
+ This is separate from the main ECR module to allow independent execution since layer
5
+ fetching can be significantly slower than basic ECR repository/image syncing.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ from typing import Any
12
+ from typing import Optional
13
+
14
+ import aioboto3
15
+ import httpx
16
+ import neo4j
17
+ from botocore.exceptions import ClientError
18
+ from types_aiobotocore_ecr import ECRClient
19
+
20
+ from cartography.client.core.tx import load
21
+ from cartography.graph.job import GraphJob
22
+ from cartography.models.aws.ecr.image import ECRImageSchema
23
+ from cartography.models.aws.ecr.image_layer import ECRImageLayerSchema
24
+ from cartography.util import timeit
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ EMPTY_LAYER_DIFF_ID = (
30
+ "sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef"
31
+ )
32
+
33
+ # Keep per-transaction memory low; each record fan-outs to many relationships.
34
+ ECR_LAYER_BATCH_SIZE = 200
35
+
36
+ # ECR manifest media types
37
+ ECR_DOCKER_INDEX_MT = "application/vnd.docker.distribution.manifest.list.v2+json"
38
+ ECR_DOCKER_MANIFEST_MT = "application/vnd.docker.distribution.manifest.v2+json"
39
+ ECR_OCI_INDEX_MT = "application/vnd.oci.image.index.v1+json"
40
+ ECR_OCI_MANIFEST_MT = "application/vnd.oci.image.manifest.v1+json"
41
+
42
+ ALL_ACCEPTED = [
43
+ ECR_OCI_INDEX_MT,
44
+ ECR_DOCKER_INDEX_MT,
45
+ ECR_OCI_MANIFEST_MT,
46
+ ECR_DOCKER_MANIFEST_MT,
47
+ ]
48
+
49
+ INDEX_MEDIA_TYPES = {ECR_OCI_INDEX_MT, ECR_DOCKER_INDEX_MT}
50
+ INDEX_MEDIA_TYPES_LOWER = {mt.lower() for mt in INDEX_MEDIA_TYPES}
51
+
52
+ # Media types that should be skipped when processing manifests
53
+ SKIP_CONFIG_MEDIA_TYPE_FRAGMENTS = {"buildkit", "attestation", "in-toto"}
54
+
55
+
56
+ def extract_repo_uri_from_image_uri(image_uri: str) -> str:
57
+ """
58
+ Extract repository URI from image URI by removing tag or digest.
59
+
60
+ Examples:
61
+ "repo@sha256:digest" -> "repo"
62
+ "repo:tag" -> "repo"
63
+ "repo" -> "repo"
64
+ """
65
+ if "@sha256:" in image_uri:
66
+ return image_uri.split("@", 1)[0]
67
+ elif ":" in image_uri:
68
+ return image_uri.rsplit(":", 1)[0]
69
+ else:
70
+ return image_uri
71
+
72
+
73
+ def extract_platform_from_manifest(manifest_ref: dict) -> str:
74
+ """Extract platform string from manifest reference."""
75
+ platform_info = manifest_ref.get("platform", {})
76
+ return _format_platform(
77
+ platform_info.get("os"),
78
+ platform_info.get("architecture"),
79
+ platform_info.get("variant"),
80
+ )
81
+
82
+
83
+ def _format_platform(
84
+ os_name: Optional[str],
85
+ architecture: Optional[str],
86
+ variant: Optional[str] = None,
87
+ ) -> str:
88
+ components = [os_name or "unknown", architecture or "unknown"]
89
+ if variant:
90
+ components.append(variant)
91
+ return "/".join(components)
92
+
93
+
94
+ async def batch_get_manifest(
95
+ ecr_client: ECRClient, repo: str, image_ref: str, accepted_media_types: list[str]
96
+ ) -> tuple[dict, str]:
97
+ """Get image manifest using batch_get_image API."""
98
+ try:
99
+ resp = await ecr_client.batch_get_image(
100
+ repositoryName=repo,
101
+ imageIds=(
102
+ [{"imageDigest": image_ref}]
103
+ if image_ref.startswith("sha256:")
104
+ else [{"imageTag": image_ref}]
105
+ ),
106
+ acceptedMediaTypes=accepted_media_types,
107
+ )
108
+ except ClientError as error:
109
+ error_code = error.response.get("Error", {}).get("Code", "")
110
+ if error_code == "ImageNotFoundException":
111
+ logger.warning(
112
+ "Image %s:%s not found while fetching manifest", repo, image_ref
113
+ )
114
+ return {}, ""
115
+ # Fail loudly on throttling or unexpected AWS errors
116
+ logger.error(
117
+ "Failed to get manifest for %s:%s due to AWS error %s",
118
+ repo,
119
+ image_ref,
120
+ error_code,
121
+ )
122
+ raise
123
+ except Exception:
124
+ logger.exception(
125
+ "Unexpected error fetching manifest for %s:%s", repo, image_ref
126
+ )
127
+ raise
128
+
129
+ if not resp.get("images"):
130
+ logger.warning(f"No image found for {repo}:{image_ref}")
131
+ return {}, ""
132
+
133
+ manifest_json = json.loads(resp["images"][0]["imageManifest"])
134
+ media_type = resp["images"][0].get("imageManifestMediaType", "")
135
+ return manifest_json, media_type
136
+
137
+
138
+ async def get_blob_json_via_presigned(
139
+ ecr_client: ECRClient,
140
+ repo: str,
141
+ digest: str,
142
+ http_client: httpx.AsyncClient,
143
+ ) -> dict:
144
+ """Download and parse JSON blob using presigned URL."""
145
+ try:
146
+ url_response = await ecr_client.get_download_url_for_layer(
147
+ repositoryName=repo,
148
+ layerDigest=digest,
149
+ )
150
+ except ClientError as error:
151
+ logger.error(
152
+ "Failed to request download URL for layer %s in repo %s: %s",
153
+ digest,
154
+ repo,
155
+ error.response.get("Error", {}).get("Code", "unknown"),
156
+ )
157
+ raise
158
+
159
+ url = url_response["downloadUrl"]
160
+ try:
161
+ response = await http_client.get(url, timeout=30.0)
162
+ response.raise_for_status()
163
+ except httpx.HTTPError as error:
164
+ logger.error(
165
+ "HTTP error downloading blob %s for repo %s: %s",
166
+ digest,
167
+ repo,
168
+ error,
169
+ )
170
+ raise
171
+
172
+ return response.json()
173
+
174
+
175
+ async def _extract_parent_image_from_attestation(
176
+ ecr_client: ECRClient,
177
+ repo_name: str,
178
+ attestation_manifest_digest: str,
179
+ http_client: httpx.AsyncClient,
180
+ ) -> Optional[dict[str, str]]:
181
+ """
182
+ Extract parent image information from an in-toto provenance attestation.
183
+
184
+ This function fetches an attestation manifest, downloads its in-toto layer,
185
+ and extracts the parent image reference from the SLSA provenance materials.
186
+
187
+ :param ecr_client: ECR client for fetching manifests and layers
188
+ :param repo_name: ECR repository name
189
+ :param attestation_manifest_digest: Digest of the attestation manifest
190
+ :param http_client: HTTP client for downloading blobs
191
+ :return: Dict with parent_image_uri and parent_image_digest, or None if no parent image found
192
+ """
193
+ try:
194
+ attestation_manifest, _ = await batch_get_manifest(
195
+ ecr_client,
196
+ repo_name,
197
+ attestation_manifest_digest,
198
+ [ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
199
+ )
200
+
201
+ if not attestation_manifest:
202
+ logger.debug(
203
+ "No attestation manifest found for digest %s in repo %s",
204
+ attestation_manifest_digest,
205
+ repo_name,
206
+ )
207
+ return None
208
+
209
+ # Get the in-toto layer from the attestation manifest
210
+ layers = attestation_manifest.get("layers", [])
211
+ intoto_layer = next(
212
+ (
213
+ layer
214
+ for layer in layers
215
+ if "in-toto" in layer.get("mediaType", "").lower()
216
+ ),
217
+ None,
218
+ )
219
+
220
+ if not intoto_layer:
221
+ logger.debug(
222
+ "No in-toto layer found in attestation manifest %s",
223
+ attestation_manifest_digest,
224
+ )
225
+ return None
226
+
227
+ # Download the in-toto attestation blob
228
+ intoto_digest = intoto_layer.get("digest")
229
+ if not intoto_digest:
230
+ logger.debug("No digest found for in-toto layer")
231
+ return None
232
+
233
+ attestation_blob = await get_blob_json_via_presigned(
234
+ ecr_client,
235
+ repo_name,
236
+ intoto_digest,
237
+ http_client,
238
+ )
239
+
240
+ if not attestation_blob:
241
+ logger.debug("Failed to download attestation blob")
242
+ return None
243
+
244
+ # Extract parent image from SLSA provenance materials
245
+ materials = attestation_blob.get("predicate", {}).get("materials", [])
246
+ for material in materials:
247
+ uri = material.get("uri", "")
248
+ uri_l = uri.lower()
249
+ # Look for container image URIs that are NOT the dockerfile itself
250
+ is_container_ref = (
251
+ uri_l.startswith("pkg:docker/")
252
+ or uri_l.startswith("pkg:oci/")
253
+ or uri_l.startswith("oci://")
254
+ )
255
+ if is_container_ref and "dockerfile" not in uri_l:
256
+ digest_obj = material.get("digest", {})
257
+ sha256_digest = digest_obj.get("sha256")
258
+ if sha256_digest:
259
+ return {
260
+ "parent_image_uri": uri,
261
+ "parent_image_digest": f"sha256:{sha256_digest}",
262
+ }
263
+
264
+ logger.debug(
265
+ "No parent image found in attestation materials for %s",
266
+ attestation_manifest_digest,
267
+ )
268
+ return None
269
+
270
+ except Exception as e:
271
+ logger.warning(
272
+ "Error extracting parent image from attestation %s in repo %s: %s",
273
+ attestation_manifest_digest,
274
+ repo_name,
275
+ e,
276
+ )
277
+ return None
278
+
279
+
280
+ async def _diff_ids_for_manifest(
281
+ ecr_client: ECRClient,
282
+ repo_name: str,
283
+ manifest_doc: dict[str, Any],
284
+ http_client: httpx.AsyncClient,
285
+ platform_hint: Optional[str],
286
+ ) -> dict[str, list[str]]:
287
+ config = manifest_doc.get("config", {})
288
+ config_media_type = config.get("mediaType", "").lower()
289
+
290
+ # Skip certain media types
291
+ if any(
292
+ skip_fragment in config_media_type
293
+ for skip_fragment in SKIP_CONFIG_MEDIA_TYPE_FRAGMENTS
294
+ ):
295
+ return {}
296
+
297
+ layers = manifest_doc.get("layers", [])
298
+ if layers and all(
299
+ "in-toto" in layer.get("mediaType", "").lower() for layer in layers
300
+ ):
301
+ return {}
302
+
303
+ cfg_digest = config.get("digest")
304
+ if not cfg_digest:
305
+ return {}
306
+
307
+ cfg_json = await get_blob_json_via_presigned(
308
+ ecr_client,
309
+ repo_name,
310
+ cfg_digest,
311
+ http_client,
312
+ )
313
+ if not cfg_json:
314
+ return {}
315
+
316
+ # Docker API uses inconsistent casing - check for known variations
317
+ rootfs = cfg_json.get("rootfs") or cfg_json.get("RootFS") or {}
318
+ diff_ids = rootfs.get("diff_ids") or rootfs.get("DiffIDs") or []
319
+ if not diff_ids:
320
+ return {}
321
+
322
+ if platform_hint:
323
+ platform = platform_hint
324
+ else:
325
+ # Docker API uses inconsistent casing for platform components
326
+ platform = _format_platform(
327
+ cfg_json.get("os") or cfg_json.get("OS"),
328
+ cfg_json.get("architecture") or cfg_json.get("Architecture"),
329
+ cfg_json.get("variant") or cfg_json.get("Variant"),
330
+ )
331
+
332
+ return {platform: diff_ids}
333
+
334
+
335
+ def transform_ecr_image_layers(
336
+ image_layers_data: dict[str, dict[str, list[str]]],
337
+ image_digest_map: dict[str, str],
338
+ image_attestation_map: Optional[dict[str, dict[str, str]]] = None,
339
+ existing_properties_map: Optional[dict[str, dict[str, Any]]] = None,
340
+ ) -> tuple[list[dict], list[dict]]:
341
+ """
342
+ Transform image layer data into format suitable for Neo4j ingestion.
343
+ Creates linked list structure with NEXT relationships and HEAD/TAIL markers.
344
+
345
+ :param image_layers_data: Map of image URI to platform to diff_ids
346
+ :param image_digest_map: Map of image URI to image digest
347
+ :param image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
348
+ :param existing_properties_map: Map of image digest to existing ECRImage properties (type, architecture, etc.)
349
+ :return: List of layer objects ready for ingestion
350
+ """
351
+ if image_attestation_map is None:
352
+ image_attestation_map = {}
353
+ if existing_properties_map is None:
354
+ existing_properties_map = {}
355
+ layers_by_diff_id: dict[str, dict[str, Any]] = {}
356
+ memberships_by_digest: dict[str, dict[str, Any]] = {}
357
+
358
+ for image_uri, platforms in image_layers_data.items():
359
+ # fetch_image_layers_async guarantees every uri in image_layers_data has a digest
360
+ image_digest = image_digest_map[image_uri]
361
+
362
+ # Check if this is a manifest list
363
+ is_manifest_list = False
364
+ if image_digest in existing_properties_map:
365
+ image_type = existing_properties_map[image_digest].get("type")
366
+ is_manifest_list = image_type == "manifest_list"
367
+
368
+ # Skip creating layer relationships for manifest lists
369
+ if is_manifest_list:
370
+ continue
371
+
372
+ ordered_layers_for_image: Optional[list[str]] = None
373
+
374
+ for _, diff_ids in platforms.items():
375
+ if not diff_ids:
376
+ continue
377
+
378
+ if ordered_layers_for_image is None:
379
+ ordered_layers_for_image = list(diff_ids)
380
+
381
+ # Process each layer in the chain
382
+ for i, diff_id in enumerate(diff_ids):
383
+ # Get or create layer
384
+ if diff_id not in layers_by_diff_id:
385
+ layers_by_diff_id[diff_id] = {
386
+ "diff_id": diff_id,
387
+ "is_empty": diff_id == EMPTY_LAYER_DIFF_ID,
388
+ "next_diff_ids": set(),
389
+ "head_image_ids": set(),
390
+ "tail_image_ids": set(),
391
+ }
392
+
393
+ layer = layers_by_diff_id[diff_id]
394
+
395
+ # Add NEXT relationship if not the last layer
396
+ if i < len(diff_ids) - 1:
397
+ layer["next_diff_ids"].add(diff_ids[i + 1])
398
+
399
+ # Track which images this layer is HEAD or TAIL of
400
+ if i == 0:
401
+ layer["head_image_ids"].add(image_digest)
402
+ if i == len(diff_ids) - 1:
403
+ layer["tail_image_ids"].add(image_digest)
404
+
405
+ if ordered_layers_for_image:
406
+ membership: dict[str, Any] = {
407
+ "layer_diff_ids": ordered_layers_for_image,
408
+ }
409
+
410
+ # Preserve existing ECRImage properties (type, architecture, os, variant, etc.)
411
+ if image_digest in existing_properties_map:
412
+ membership.update(existing_properties_map[image_digest])
413
+
414
+ # Add attestation data if available for this image
415
+ if image_uri in image_attestation_map:
416
+ attestation = image_attestation_map[image_uri]
417
+ membership["parent_image_uri"] = attestation["parent_image_uri"]
418
+ membership["parent_image_digest"] = attestation["parent_image_digest"]
419
+ membership["from_attestation"] = True
420
+ membership["confidence"] = "explicit"
421
+
422
+ memberships_by_digest[image_digest] = membership
423
+
424
+ # Convert sets back to lists for Neo4j ingestion
425
+ layers = []
426
+ for layer in layers_by_diff_id.values():
427
+ layer_dict: dict[str, Any] = {
428
+ "diff_id": layer["diff_id"],
429
+ "is_empty": layer["is_empty"],
430
+ }
431
+ if layer["next_diff_ids"]:
432
+ layer_dict["next_diff_ids"] = list(layer["next_diff_ids"])
433
+ if layer["head_image_ids"]:
434
+ layer_dict["head_image_ids"] = list(layer["head_image_ids"])
435
+ if layer["tail_image_ids"]:
436
+ layer_dict["tail_image_ids"] = list(layer["tail_image_ids"])
437
+ layers.append(layer_dict)
438
+
439
+ # Reconstruct memberships list with imageDigest field
440
+ memberships = [
441
+ {"imageDigest": digest, **membership_data}
442
+ for digest, membership_data in memberships_by_digest.items()
443
+ ]
444
+
445
+ return layers, memberships
446
+
447
+
448
+ @timeit
449
+ def load_ecr_image_layers(
450
+ neo4j_session: neo4j.Session,
451
+ image_layers: list[dict],
452
+ region: str,
453
+ current_aws_account_id: str,
454
+ aws_update_tag: int,
455
+ ) -> None:
456
+ """
457
+ Load image layers into Neo4j.
458
+
459
+ Uses a conservative batch size (ECR_LAYER_LOAD_BATCH_SIZE) to avoid Neo4j
460
+ transaction memory limits, since layer objects can contain large arrays of
461
+ relationships.
462
+ """
463
+ logger.info(
464
+ f"Loading {len(image_layers)} image layers for region {region} into graph.",
465
+ )
466
+
467
+ load(
468
+ neo4j_session,
469
+ ECRImageLayerSchema(),
470
+ image_layers,
471
+ batch_size=ECR_LAYER_BATCH_SIZE,
472
+ lastupdated=aws_update_tag,
473
+ AWS_ID=current_aws_account_id,
474
+ )
475
+
476
+
477
+ @timeit
478
+ def load_ecr_image_layer_memberships(
479
+ neo4j_session: neo4j.Session,
480
+ memberships: list[dict[str, Any]],
481
+ region: str,
482
+ current_aws_account_id: str,
483
+ aws_update_tag: int,
484
+ ) -> None:
485
+ """
486
+ Load image layer memberships into Neo4j.
487
+
488
+ Uses a conservative batch size (ECR_LAYER_MEMBERSHIP_BATCH_SIZE) to avoid
489
+ Neo4j transaction memory limits, since membership objects can contain large
490
+ arrays of layer diff_ids.
491
+ """
492
+ load(
493
+ neo4j_session,
494
+ ECRImageSchema(),
495
+ memberships,
496
+ batch_size=ECR_LAYER_BATCH_SIZE,
497
+ lastupdated=aws_update_tag,
498
+ Region=region,
499
+ AWS_ID=current_aws_account_id,
500
+ )
501
+
502
+
503
+ async def fetch_image_layers_async(
504
+ ecr_client: ECRClient,
505
+ repo_images_list: list[dict],
506
+ max_concurrent: int = 200,
507
+ ) -> tuple[dict[str, dict[str, list[str]]], dict[str, str], dict[str, dict[str, str]]]:
508
+ """
509
+ Fetch image layers for ECR images in parallel with caching and non-blocking I/O.
510
+
511
+ Returns:
512
+ - image_layers_data: Map of image URI to platform to diff_ids
513
+ - image_digest_map: Map of image URI to image digest
514
+ - image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
515
+ """
516
+ image_layers_data: dict[str, dict[str, list[str]]] = {}
517
+ image_digest_map: dict[str, str] = {}
518
+ image_attestation_map: dict[str, dict[str, str]] = {}
519
+ semaphore = asyncio.Semaphore(max_concurrent)
520
+
521
+ # Cache for manifest fetches keyed by (repo_name, imageDigest)
522
+ manifest_cache: dict[tuple[str, str], tuple[dict, str]] = {}
523
+ # Lock for thread-safe cache access
524
+ cache_lock = asyncio.Lock()
525
+ # In-flight requests to coalesce duplicate fetches
526
+ inflight: dict[tuple[str, str], asyncio.Task] = {}
527
+
528
+ async def _fetch_and_cache_manifest(
529
+ repo_name: str, digest_or_tag: str, accepted: list[str]
530
+ ) -> tuple[dict, str]:
531
+ """
532
+ Fetch and cache manifest with double-checked locking and in-flight coalescing.
533
+ """
534
+ key = (repo_name, digest_or_tag)
535
+
536
+ # Fast path: check cache without lock
537
+ if key in manifest_cache:
538
+ return manifest_cache[key]
539
+
540
+ # Check for existing in-flight request
541
+ task = inflight.get(key)
542
+ if task is None:
543
+ # Create new task for this manifest
544
+ async def _do() -> tuple[dict, str]:
545
+ # Fetch without holding the lock
546
+ doc, mt = await batch_get_manifest(
547
+ ecr_client, repo_name, digest_or_tag, accepted
548
+ )
549
+ # Store result under lock (second check to avoid races)
550
+ async with cache_lock:
551
+ return manifest_cache.setdefault(key, (doc, mt))
552
+
553
+ task = asyncio.create_task(_do())
554
+ inflight[key] = task
555
+
556
+ try:
557
+ return await task
558
+ finally:
559
+ # Clean up inflight entry
560
+ inflight.pop(key, None)
561
+
562
+ async def fetch_single_image_layers(
563
+ repo_image: dict,
564
+ http_client: httpx.AsyncClient,
565
+ ) -> Optional[
566
+ tuple[str, str, dict[str, list[str]], Optional[dict[str, dict[str, str]]]]
567
+ ]:
568
+ """
569
+ Fetch layers for a single image and extract attestation if present.
570
+
571
+ Returns tuple of (uri, digest, platform_layers, attestations_by_child_digest) where
572
+ attestations_by_child_digest maps child image digest to parent image info
573
+ """
574
+ async with semaphore:
575
+ # Caller guarantees these fields exist in every repo_image
576
+ uri = repo_image["uri"]
577
+ digest = repo_image["imageDigest"]
578
+ repo_uri = repo_image["repo_uri"]
579
+
580
+ # Extract repository name
581
+ parts = repo_uri.split("/", 1)
582
+ if len(parts) != 2:
583
+ raise ValueError(f"Unexpected ECR repository URI format: {repo_uri}")
584
+ repo_name = parts[1]
585
+
586
+ # Get manifest using optimized caching
587
+ doc, media_type = await _fetch_and_cache_manifest(
588
+ repo_name, digest, ALL_ACCEPTED
589
+ )
590
+
591
+ if not doc:
592
+ return None
593
+
594
+ manifest_media_type = (media_type or doc.get("mediaType", "")).lower()
595
+ platform_layers: dict[str, list[str]] = {}
596
+ attestation_data: Optional[dict[str, dict[str, str]]] = None
597
+
598
+ if doc.get("manifests") and manifest_media_type in INDEX_MEDIA_TYPES_LOWER:
599
+
600
+ async def _process_child_manifest(
601
+ manifest_ref: dict,
602
+ ) -> tuple[dict[str, list[str]], Optional[tuple[str, dict[str, str]]]]:
603
+ # Check if this is an attestation manifest
604
+ if (
605
+ manifest_ref.get("annotations", {}).get(
606
+ "vnd.docker.reference.type"
607
+ )
608
+ == "attestation-manifest"
609
+ ):
610
+ # Extract which child image this attestation is for
611
+ attests_child_digest = manifest_ref.get("annotations", {}).get(
612
+ "vnd.docker.reference.digest"
613
+ )
614
+ if not attests_child_digest:
615
+ return {}, None
616
+
617
+ # Extract base image from attestation
618
+ attestation_digest = manifest_ref.get("digest")
619
+ if attestation_digest:
620
+ attestation_info = (
621
+ await _extract_parent_image_from_attestation(
622
+ ecr_client,
623
+ repo_name,
624
+ attestation_digest,
625
+ http_client,
626
+ )
627
+ )
628
+ if attestation_info:
629
+ # Return (attests_child_digest, parent_info) tuple
630
+ return {}, (attests_child_digest, attestation_info)
631
+ return {}, None
632
+
633
+ child_digest = manifest_ref.get("digest")
634
+ if not child_digest:
635
+ return {}, None
636
+
637
+ # Use optimized caching for child manifest
638
+ child_doc, _ = await _fetch_and_cache_manifest(
639
+ repo_name,
640
+ child_digest,
641
+ [ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
642
+ )
643
+ if not child_doc:
644
+ return {}, None
645
+
646
+ platform_hint = extract_platform_from_manifest(manifest_ref)
647
+ diff_map = await _diff_ids_for_manifest(
648
+ ecr_client,
649
+ repo_name,
650
+ child_doc,
651
+ http_client,
652
+ platform_hint,
653
+ )
654
+ return diff_map, None
655
+
656
+ # Process all child manifests in parallel
657
+ child_tasks = [
658
+ _process_child_manifest(manifest_ref)
659
+ for manifest_ref in doc.get("manifests", [])
660
+ ]
661
+ child_results = await asyncio.gather(
662
+ *child_tasks, return_exceptions=True
663
+ )
664
+
665
+ # Merge results from successful child manifest processing
666
+ # Track attestation data by child digest for proper mapping
667
+ attestations_by_child_digest: dict[str, dict[str, str]] = {}
668
+
669
+ for result in child_results:
670
+ if isinstance(result, tuple) and len(result) == 2:
671
+ layer_data, attest_data = result
672
+ if layer_data:
673
+ platform_layers.update(layer_data)
674
+ if attest_data:
675
+ # attest_data is (child_digest, parent_info) tuple
676
+ child_digest, parent_info = attest_data
677
+ attestations_by_child_digest[child_digest] = parent_info
678
+
679
+ # Build attestation_data with child digest mapping
680
+ if attestations_by_child_digest:
681
+ attestation_data = attestations_by_child_digest
682
+ else:
683
+ diff_map = await _diff_ids_for_manifest(
684
+ ecr_client,
685
+ repo_name,
686
+ doc,
687
+ http_client,
688
+ None,
689
+ )
690
+ platform_layers.update(diff_map)
691
+
692
+ # Return if we found layers or attestation data
693
+ # Manifest lists may have attestation_data without platform_layers
694
+ if platform_layers or attestation_data:
695
+ return uri, digest, platform_layers, attestation_data
696
+
697
+ return None
698
+
699
+ async with httpx.AsyncClient() as http_client:
700
+ # Create tasks for all images
701
+ tasks = [
702
+ asyncio.create_task(
703
+ fetch_single_image_layers(repo_image, http_client),
704
+ )
705
+ for repo_image in repo_images_list
706
+ ]
707
+
708
+ # Process with progress logging
709
+ total = len(tasks)
710
+ logger.info(
711
+ f"Fetching layers for {total} images with {max_concurrent} concurrent connections..."
712
+ )
713
+
714
+ if not tasks:
715
+ return image_layers_data, image_digest_map, image_attestation_map
716
+
717
+ progress_interval = max(1, min(100, total // 10 or 1))
718
+ completed = 0
719
+
720
+ for task in asyncio.as_completed(tasks):
721
+ result = await task
722
+ completed += 1
723
+
724
+ if completed % progress_interval == 0 or completed == total:
725
+ percent = (completed / total) * 100
726
+ logger.info(
727
+ "Fetched layer metadata for %d/%d images (%.1f%%)",
728
+ completed,
729
+ total,
730
+ percent,
731
+ )
732
+
733
+ if result:
734
+ uri, digest, layer_data, attestations_by_child_digest = result
735
+ if not digest:
736
+ raise ValueError(f"Empty digest returned for image {uri}")
737
+ image_layers_data[uri] = layer_data
738
+ image_digest_map[uri] = digest
739
+ if attestations_by_child_digest:
740
+ # Map attestation data by child digest URIs
741
+ repo_uri = extract_repo_uri_from_image_uri(uri)
742
+ for (
743
+ child_digest,
744
+ parent_info,
745
+ ) in attestations_by_child_digest.items():
746
+ child_uri = f"{repo_uri}@{child_digest}"
747
+ image_attestation_map[child_uri] = parent_info
748
+ # Also add to digest map so transform can look up the child digest
749
+ image_digest_map[child_uri] = child_digest
750
+
751
+ logger.info(
752
+ f"Successfully fetched layers for {len(image_layers_data)}/{len(repo_images_list)} images"
753
+ )
754
+ if image_attestation_map:
755
+ logger.info(
756
+ f"Found attestations with base image info for {len(image_attestation_map)} images"
757
+ )
758
+ return image_layers_data, image_digest_map, image_attestation_map
759
+
760
+
761
+ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
762
+ logger.debug("Running image layer cleanup job.")
763
+ GraphJob.from_node_schema(ECRImageLayerSchema(), common_job_parameters).run(
764
+ neo4j_session
765
+ )
766
+
767
+
768
+ @timeit
769
+ def sync(
770
+ neo4j_session: neo4j.Session,
771
+ aioboto3_session: aioboto3.Session,
772
+ regions: list[str],
773
+ current_aws_account_id: str,
774
+ update_tag: int,
775
+ common_job_parameters: dict,
776
+ ) -> None:
777
+ """
778
+ Sync ECR image layers. This fetches detailed layer information for ECR images
779
+ that already exist in the graph.
780
+
781
+ Prerequisites: Basic ECR data (repositories and images) must already be loaded
782
+ via the 'ecr' module before running this.
783
+
784
+ Layer fetching can be slow for accounts with many container images.
785
+ """
786
+
787
+ for region in regions:
788
+ logger.info(
789
+ "Syncing ECR image layers for region '%s' in account '%s'.",
790
+ region,
791
+ current_aws_account_id,
792
+ )
793
+
794
+ # Query for ECR images with all their existing properties to preserve during layer sync
795
+ query = """
796
+ MATCH (img:ECRImage)<-[:IMAGE]-(repo_img:ECRRepositoryImage)<-[:REPO_IMAGE]-(repo:ECRRepository)
797
+ MATCH (repo)<-[:RESOURCE]-(:AWSAccount {id: $AWS_ID})
798
+ WHERE repo.region = $Region
799
+ RETURN DISTINCT
800
+ img.digest AS digest,
801
+ repo_img.id AS uri,
802
+ repo.uri AS repo_uri,
803
+ img.type AS type,
804
+ img.architecture AS architecture,
805
+ img.os AS os,
806
+ img.variant AS variant,
807
+ img.attestation_type AS attestation_type,
808
+ img.attests_digest AS attests_digest,
809
+ img.media_type AS media_type,
810
+ img.artifact_media_type AS artifact_media_type,
811
+ img.child_image_digests AS child_image_digests
812
+ """
813
+ from cartography.client.core.tx import read_list_of_dicts_tx
814
+
815
+ ecr_images = neo4j_session.read_transaction(
816
+ read_list_of_dicts_tx, query, AWS_ID=current_aws_account_id, Region=region
817
+ )
818
+
819
+ # Build repo_images_list and existing_properties map
820
+ repo_images_list = []
821
+ existing_properties = {}
822
+ seen_digests = set()
823
+
824
+ for img_data in ecr_images:
825
+ digest = img_data["digest"]
826
+ image_type = img_data.get("type")
827
+
828
+ if digest not in seen_digests:
829
+ seen_digests.add(digest)
830
+
831
+ # Store existing properties for ALL images to preserve during updates
832
+ existing_properties[digest] = {
833
+ "type": image_type,
834
+ "architecture": img_data.get("architecture"),
835
+ "os": img_data.get("os"),
836
+ "variant": img_data.get("variant"),
837
+ "attestation_type": img_data.get("attestation_type"),
838
+ "attests_digest": img_data.get("attests_digest"),
839
+ "media_type": img_data.get("media_type"),
840
+ "artifact_media_type": img_data.get("artifact_media_type"),
841
+ "child_image_digests": img_data.get("child_image_digests"),
842
+ }
843
+
844
+ repo_uri = img_data["repo_uri"]
845
+ digest_uri = f"{repo_uri}@{digest}"
846
+
847
+ # Fetch manifests for:
848
+ # - Platform-specific images (type="image") - to get their layers
849
+ # - Manifest lists (type="manifest_list") - to extract attestation parent image data
850
+ # Skip only attestations since they don't have useful layer or parent data
851
+ if image_type != "attestation":
852
+ repo_images_list.append(
853
+ {
854
+ "imageDigest": digest,
855
+ "uri": digest_uri,
856
+ "repo_uri": repo_uri,
857
+ }
858
+ )
859
+
860
+ logger.info(
861
+ f"Found {len(repo_images_list)} distinct ECR image digests in graph for region {region}"
862
+ )
863
+
864
+ if not repo_images_list:
865
+ logger.warning(
866
+ f"No ECR images found in graph for region {region}. "
867
+ f"Run 'ecr' sync first to populate basic ECR data."
868
+ )
869
+ continue
870
+
871
+ # Fetch and load image layers using async ECR client
872
+ if repo_images_list:
873
+ logger.info(
874
+ f"Starting to fetch layers for {len(repo_images_list)} images..."
875
+ )
876
+
877
+ async def _fetch_with_async_client() -> tuple[
878
+ dict[str, dict[str, list[str]]],
879
+ dict[str, str],
880
+ dict[str, dict[str, str]],
881
+ ]:
882
+ async with aioboto3_session.client(
883
+ "ecr", region_name=region
884
+ ) as ecr_client:
885
+ return await fetch_image_layers_async(ecr_client, repo_images_list)
886
+
887
+ # Use get_event_loop() + run_until_complete() to avoid tearing down loop
888
+ try:
889
+ loop = asyncio.get_event_loop()
890
+ except RuntimeError:
891
+ # No event loop in current thread, create one
892
+ loop = asyncio.new_event_loop()
893
+ asyncio.set_event_loop(loop)
894
+
895
+ image_layers_data, image_digest_map, image_attestation_map = (
896
+ loop.run_until_complete(_fetch_with_async_client())
897
+ )
898
+
899
+ logger.info(
900
+ f"Successfully fetched layers for {len(image_layers_data)} images"
901
+ )
902
+ layers, memberships = transform_ecr_image_layers(
903
+ image_layers_data,
904
+ image_digest_map,
905
+ image_attestation_map,
906
+ existing_properties,
907
+ )
908
+ load_ecr_image_layers(
909
+ neo4j_session,
910
+ layers,
911
+ region,
912
+ current_aws_account_id,
913
+ update_tag,
914
+ )
915
+ load_ecr_image_layer_memberships(
916
+ neo4j_session,
917
+ memberships,
918
+ region,
919
+ current_aws_account_id,
920
+ update_tag,
921
+ )
922
+
923
+ cleanup(neo4j_session, common_job_parameters)