cartography 0.104.0rc3__py3-none-any.whl → 0.106.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (75) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +26 -1
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/config.py +8 -0
  6. cartography/data/indexes.cypher +0 -37
  7. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
  8. cartography/graph/cleanupbuilder.py +151 -41
  9. cartography/intel/aws/acm.py +124 -0
  10. cartography/intel/aws/cloudtrail.py +3 -38
  11. cartography/intel/aws/ecr.py +8 -2
  12. cartography/intel/aws/ecs.py +228 -380
  13. cartography/intel/aws/efs.py +99 -11
  14. cartography/intel/aws/iam.py +1 -1
  15. cartography/intel/aws/identitycenter.py +14 -3
  16. cartography/intel/aws/inspector.py +106 -53
  17. cartography/intel/aws/lambda_function.py +1 -1
  18. cartography/intel/aws/rds.py +2 -1
  19. cartography/intel/aws/resources.py +2 -0
  20. cartography/intel/aws/s3.py +195 -4
  21. cartography/intel/aws/sqs.py +36 -90
  22. cartography/intel/entra/__init__.py +22 -0
  23. cartography/intel/entra/applications.py +366 -0
  24. cartography/intel/entra/groups.py +151 -0
  25. cartography/intel/entra/ou.py +21 -5
  26. cartography/intel/kubernetes/__init__.py +30 -14
  27. cartography/intel/kubernetes/clusters.py +86 -0
  28. cartography/intel/kubernetes/namespaces.py +59 -57
  29. cartography/intel/kubernetes/pods.py +140 -77
  30. cartography/intel/kubernetes/secrets.py +95 -45
  31. cartography/intel/kubernetes/services.py +131 -67
  32. cartography/intel/kubernetes/util.py +125 -14
  33. cartography/intel/trivy/__init__.py +161 -0
  34. cartography/intel/trivy/scanner.py +363 -0
  35. cartography/models/aws/acm/__init__.py +0 -0
  36. cartography/models/aws/acm/certificate.py +75 -0
  37. cartography/models/aws/cloudtrail/trail.py +24 -0
  38. cartography/models/aws/ecs/__init__.py +0 -0
  39. cartography/models/aws/ecs/clusters.py +64 -0
  40. cartography/models/aws/ecs/container_definitions.py +93 -0
  41. cartography/models/aws/ecs/container_instances.py +84 -0
  42. cartography/models/aws/ecs/containers.py +80 -0
  43. cartography/models/aws/ecs/services.py +117 -0
  44. cartography/models/aws/ecs/task_definitions.py +97 -0
  45. cartography/models/aws/ecs/tasks.py +110 -0
  46. cartography/models/aws/efs/file_system.py +60 -0
  47. cartography/models/aws/efs/mount_target.py +29 -2
  48. cartography/models/aws/s3/notification.py +24 -0
  49. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  50. cartography/models/aws/sqs/__init__.py +0 -0
  51. cartography/models/aws/sqs/queue.py +89 -0
  52. cartography/models/core/nodes.py +15 -2
  53. cartography/models/entra/app_role_assignment.py +115 -0
  54. cartography/models/entra/application.py +47 -0
  55. cartography/models/entra/group.py +91 -0
  56. cartography/models/kubernetes/__init__.py +0 -0
  57. cartography/models/kubernetes/clusters.py +26 -0
  58. cartography/models/kubernetes/containers.py +108 -0
  59. cartography/models/kubernetes/namespaces.py +51 -0
  60. cartography/models/kubernetes/pods.py +80 -0
  61. cartography/models/kubernetes/secrets.py +79 -0
  62. cartography/models/kubernetes/services.py +108 -0
  63. cartography/models/trivy/__init__.py +0 -0
  64. cartography/models/trivy/findings.py +66 -0
  65. cartography/models/trivy/fix.py +66 -0
  66. cartography/models/trivy/package.py +71 -0
  67. cartography/sync.py +2 -0
  68. cartography/util.py +15 -10
  69. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/METADATA +3 -2
  70. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/RECORD +74 -40
  71. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  72. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/WHEEL +0 -0
  73. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/entry_points.txt +0 -0
  74. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/licenses/LICENSE +0 -0
  75. {cartography-0.104.0rc3.dist-info → cartography-0.106.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,90 +1,154 @@
1
+ import json
1
2
  import logging
2
- from typing import Dict
3
- from typing import List
3
+ from typing import Any
4
4
 
5
- from neo4j import Session
5
+ import neo4j
6
+ from kubernetes.client.models import V1LoadBalancerIngress
7
+ from kubernetes.client.models import V1PortStatus
8
+ from kubernetes.client.models import V1Service
6
9
 
10
+ from cartography.client.core.tx import load
11
+ from cartography.graph.job import GraphJob
7
12
  from cartography.intel.kubernetes.util import get_epoch
13
+ from cartography.intel.kubernetes.util import k8s_paginate
8
14
  from cartography.intel.kubernetes.util import K8sClient
15
+ from cartography.models.kubernetes.services import KubernetesServiceSchema
9
16
  from cartography.util import timeit
10
17
 
11
18
  logger = logging.getLogger(__name__)
12
19
 
13
20
 
14
21
  @timeit
15
- def sync_services(
16
- session: Session,
17
- client: K8sClient,
18
- update_tag: int,
19
- cluster: Dict,
20
- pods: List[Dict],
21
- ) -> None:
22
- services = get_services(client, cluster, pods)
23
- load_services(session, services, update_tag)
22
+ def get_services(client: K8sClient) -> list[V1Service]:
23
+ items = k8s_paginate(client.core.list_service_for_all_namespaces)
24
+ return items
24
25
 
25
26
 
26
- @timeit
27
- def get_services(client: K8sClient, cluster: Dict, pods: List[Dict]) -> List[Dict]:
28
- services = list()
29
- for service in client.core.list_service_for_all_namespaces().items:
27
+ def _format_service_selector(selector: dict[str, str]) -> str:
28
+ return json.dumps(selector)
29
+
30
+
31
+ def _format_load_balancer_ingress(ingress: list[V1LoadBalancerIngress] | None) -> str:
32
+
33
+ def _format_ingress_ports(
34
+ ports: list[V1PortStatus] | None,
35
+ ) -> list[dict[str, Any]] | None:
36
+ if ports is None:
37
+ return None
38
+
39
+ ingress_ports = []
40
+ for port in ports:
41
+ ingress_ports.append(
42
+ {
43
+ "error": port.port,
44
+ "port": port.protocol,
45
+ "protocol": port.ip,
46
+ }
47
+ )
48
+ return ingress_ports
49
+
50
+ if ingress is None:
51
+ return json.dumps(None)
52
+
53
+ loadbalancer_ingress = []
54
+ for item in ingress:
55
+ loadbalancer_ingress.append(
56
+ {
57
+ "hostname": item.hostname,
58
+ "ip": item.ip,
59
+ "ip_mode": item.ip_mode,
60
+ "ports": _format_ingress_ports(item.ports),
61
+ }
62
+ )
63
+ return json.dumps(loadbalancer_ingress)
64
+
65
+
66
+ def transform_services(
67
+ services: list[V1Service], all_pods: list[dict[str, Any]]
68
+ ) -> list[dict[str, Any]]:
69
+ services_list = []
70
+ for service in services:
30
71
  item = {
31
72
  "uid": service.metadata.uid,
32
73
  "name": service.metadata.name,
33
74
  "creation_timestamp": get_epoch(service.metadata.creation_timestamp),
34
75
  "deletion_timestamp": get_epoch(service.metadata.deletion_timestamp),
35
76
  "namespace": service.metadata.namespace,
36
- "cluster_uid": cluster["uid"],
37
77
  "type": service.spec.type,
38
- "selector": service.spec.selector,
78
+ "selector": _format_service_selector(service.spec.selector),
79
+ "cluster_ip": service.spec.cluster_ip,
39
80
  "load_balancer_ip": service.spec.load_balancer_ip,
40
81
  }
41
82
 
42
- ingresses = service.status.load_balancer.ingress
43
- for ingress in ingresses or list():
44
- item.update({"ingress_host": ingress.hostname, "ingress_ip": ingress.ip})
45
-
46
- service_pods = list()
47
- for pod in pods:
48
- is_service_pod = True if service.spec.selector else False
49
- for selector in service.spec.selector or dict():
50
- if (
51
- not pod.get("labels")
52
- or selector not in pod["labels"]
53
- or service.spec.selector[selector] != pod["labels"][selector]
54
- ):
55
- is_service_pod = False
56
- break
57
- if is_service_pod:
58
- service_pods.append(pod)
59
- item["pods"] = service_pods
60
- services.append(item)
61
- return services
62
-
63
-
64
- def load_services(session: Session, data: List[Dict], update_tag: int) -> None:
65
- ingestion_cypher_query = """
66
- UNWIND $services as k8service
67
- MERGE (service:KubernetesService {id: k8service.uid})
68
- ON CREATE SET service.firstseen = timestamp()
69
- SET service.lastupdated = $update_tag,
70
- service.name = k8service.name,
71
- service.created_at = k8service.creation_timestamp,
72
- service.deleted_at = k8service.deletion_timestamp,
73
- service.type = k8service.type,
74
- service.load_balancer_ip = k8service.load_balancer_ip,
75
- service.ingress_host = k8service.ingress_host,
76
- service.ingress_ip = k8service.ingress_ip
77
- WITH service, k8service.namespace as ns, k8service.cluster_uid as cuid, k8service.pods as k8pods
78
- MATCH (cluster:KubernetesCluster {id: cuid})-[:HAS_NAMESPACE]->(space:KubernetesNamespace {name: ns})
79
- MERGE (space)-[rel1:HAS_SERVICE]->(service)
80
- ON CREATE SET rel1.firstseen = timestamp()
81
- SET rel1.lastupdated = $update_tag
82
- WITH service, k8pods
83
- UNWIND k8pods as k8pod
84
- MATCH (pod:KubernetesPod {id: k8pod.uid})
85
- MERGE (service)-[rel2:SERVES_POD]->(pod)
86
- ON CREATE SET rel2.firstseen = timestamp()
87
- SET rel2.lastupdated = $update_tag
88
- """
89
- logger.info(f"Loading {len(data)} kubernetes services.")
90
- session.run(ingestion_cypher_query, services=data, update_tag=update_tag)
83
+ # TODO: instead of storing a json string, we should probably create seperate nodes for each ingress
84
+ if service.spec.type == "LoadBalancer":
85
+ if service.status.load_balancer:
86
+ item["load_balancer_ingress"] = _format_load_balancer_ingress(
87
+ service.status.load_balancer.ingress
88
+ )
89
+
90
+ # check if pod labels match service selector and add pod_ids to item
91
+ pod_ids = []
92
+ for pod in all_pods:
93
+ if pod["namespace"] == service.metadata.namespace:
94
+ service_selector: dict[str, str] | None = service.spec.selector
95
+ pod_labels: dict[str, str] | None = json.loads(pod["labels"])
96
+
97
+ # check if pod labels match service selector
98
+ if pod_labels and service_selector:
99
+ if all(
100
+ service_selector[key] == pod_labels.get(key)
101
+ for key in service_selector
102
+ ):
103
+ pod_ids.append(pod["uid"])
104
+
105
+ item["pod_ids"] = pod_ids
106
+
107
+ services_list.append(item)
108
+ return services_list
109
+
110
+
111
+ def load_services(
112
+ session: neo4j.Session,
113
+ services: list[dict[str, Any]],
114
+ update_tag: int,
115
+ cluster_id: str,
116
+ cluster_name: str,
117
+ ) -> None:
118
+ logger.info(f"Loading {len(services)} KubernetesServices")
119
+ load(
120
+ session,
121
+ KubernetesServiceSchema(),
122
+ services,
123
+ lastupdated=update_tag,
124
+ CLUSTER_ID=cluster_id,
125
+ CLUSTER_NAME=cluster_name,
126
+ )
127
+
128
+
129
+ def cleanup(session: neo4j.Session, common_job_parameters: dict[str, Any]) -> None:
130
+ logger.debug("Running cleanup job for KubernetesService")
131
+ cleanup_job = GraphJob.from_node_schema(
132
+ KubernetesServiceSchema(), common_job_parameters
133
+ )
134
+ cleanup_job.run(session)
135
+
136
+
137
+ @timeit
138
+ def sync_services(
139
+ session: neo4j.Session,
140
+ client: K8sClient,
141
+ all_pods: list[dict[str, Any]],
142
+ update_tag: int,
143
+ common_job_parameters: dict[str, Any],
144
+ ) -> None:
145
+ services = get_services(client)
146
+ transformed_services = transform_services(services, all_pods)
147
+ load_services(
148
+ session=session,
149
+ services=transformed_services,
150
+ update_tag=update_tag,
151
+ cluster_id=common_job_parameters["CLUSTER_ID"],
152
+ cluster_name=client.name,
153
+ )
154
+ cleanup(session, common_job_parameters)
@@ -1,11 +1,16 @@
1
+ import logging
1
2
  from datetime import datetime
2
- from typing import List
3
- from typing import Union
3
+ from typing import Any
4
+ from typing import Callable
4
5
 
5
6
  from kubernetes import config
6
7
  from kubernetes.client import ApiClient
7
8
  from kubernetes.client import CoreV1Api
8
9
  from kubernetes.client import NetworkingV1Api
10
+ from kubernetes.client import VersionApi
11
+ from kubernetes.client.exceptions import ApiException
12
+
13
+ logger = logging.getLogger(__name__)
9
14
 
10
15
 
11
16
  class KubernetesContextNotFound(Exception):
@@ -13,39 +18,145 @@ class KubernetesContextNotFound(Exception):
13
18
 
14
19
 
15
20
  class K8CoreApiClient(CoreV1Api):
16
- def __init__(self, name: str, api_client: ApiClient = None) -> None:
21
+ def __init__(
22
+ self,
23
+ name: str,
24
+ config_file: str,
25
+ api_client: ApiClient | None = None,
26
+ ) -> None:
17
27
  self.name = name
18
28
  if not api_client:
19
- api_client = config.new_client_from_config(context=name)
29
+ api_client = config.new_client_from_config(
30
+ context=name, config_file=config_file
31
+ )
20
32
  super().__init__(api_client=api_client)
21
33
 
22
34
 
23
35
  class K8NetworkingApiClient(NetworkingV1Api):
24
- def __init__(self, name: str, api_client: ApiClient = None) -> None:
36
+ def __init__(
37
+ self,
38
+ name: str,
39
+ config_file: str,
40
+ api_client: ApiClient | None = None,
41
+ ) -> None:
42
+ self.name = name
43
+ if not api_client:
44
+ api_client = config.new_client_from_config(
45
+ context=name, config_file=config_file
46
+ )
47
+ super().__init__(api_client=api_client)
48
+
49
+
50
+ class K8VersionApiClient(VersionApi):
51
+ def __init__(
52
+ self,
53
+ name: str,
54
+ config_file: str,
55
+ api_client: ApiClient | None = None,
56
+ ) -> None:
25
57
  self.name = name
26
58
  if not api_client:
27
- api_client = config.new_client_from_config(context=name)
59
+ api_client = config.new_client_from_config(
60
+ context=name, config_file=config_file
61
+ )
28
62
  super().__init__(api_client=api_client)
29
63
 
30
64
 
31
65
  class K8sClient:
32
- def __init__(self, name: str) -> None:
66
+ def __init__(
67
+ self,
68
+ name: str,
69
+ config_file: str,
70
+ external_id: str | None = None,
71
+ ) -> None:
33
72
  self.name = name
34
- self.core = K8CoreApiClient(self.name)
35
- self.networking = K8NetworkingApiClient(self.name)
73
+ self.config_file = config_file
74
+ self.external_id = external_id
75
+ self.core = K8CoreApiClient(self.name, self.config_file)
76
+ self.networking = K8NetworkingApiClient(self.name, self.config_file)
77
+ self.version = K8VersionApiClient(self.name, self.config_file)
36
78
 
37
79
 
38
- def get_k8s_clients(kubeconfig: str) -> List[K8sClient]:
80
+ def get_k8s_clients(kubeconfig: str) -> list[K8sClient]:
81
+ # returns a tuple of (all contexts, current context)
39
82
  contexts, _ = config.list_kube_config_contexts(kubeconfig)
40
83
  if not contexts:
41
84
  raise KubernetesContextNotFound("No context found in kubeconfig.")
42
- clients = list()
85
+
86
+ clients = []
43
87
  for context in contexts:
44
- clients.append(K8sClient(context["name"]))
88
+ clients.append(
89
+ K8sClient(
90
+ context["name"],
91
+ kubeconfig,
92
+ external_id=context["context"].get("cluster"),
93
+ ),
94
+ )
45
95
  return clients
46
96
 
47
97
 
48
- def get_epoch(date: datetime) -> Union[int, None]:
98
+ def get_epoch(date: datetime | None) -> int | None:
49
99
  if date:
50
- return int(date.strftime("%s"))
100
+ return int(date.timestamp())
51
101
  return None
102
+
103
+
104
+ def k8s_paginate(
105
+ list_func: Callable,
106
+ **kwargs: Any,
107
+ ) -> list[dict[str, Any]]:
108
+ """
109
+ Handles pagination for a Kubernetes API call.
110
+
111
+ :param list_func: The list function to call (e.g. client.core.list_pod_for_all_namespaces)
112
+ :param kwargs: Keyword arguments to pass to the list function (e.g. limit=100)
113
+ :return: A list of all resources returned by the list function
114
+ """
115
+ all_resources = []
116
+ continue_token = None
117
+ limit = kwargs.pop("limit", 100)
118
+ function_name = list_func.__name__
119
+
120
+ logger.debug(f"Starting pagination for {function_name} with limit {limit}.")
121
+
122
+ while True:
123
+ try:
124
+ if continue_token:
125
+ response = list_func(limit=limit, _continue=continue_token, **kwargs)
126
+ else:
127
+ response = list_func(limit=limit, **kwargs)
128
+
129
+ # Check if items exists on the response
130
+ if not hasattr(response, "items"):
131
+ logger.warning(
132
+ f"Response from {function_name} does not contain 'items' attribute."
133
+ )
134
+ break
135
+
136
+ items_count = len(response.items)
137
+ all_resources.extend(response.items)
138
+
139
+ logger.debug(f"Retrieved {items_count} {function_name} resources")
140
+
141
+ # Check if metadata exists on the response
142
+ if not hasattr(response, "metadata"):
143
+ logger.warning(
144
+ f"Response from {function_name} does not contain 'metadata' attribute."
145
+ )
146
+ break
147
+
148
+ continue_token = response.metadata._continue
149
+ if not continue_token:
150
+ logger.debug(f"No more {function_name} resources to retrieve.")
151
+ break
152
+
153
+ except ApiException as e:
154
+ logger.error(
155
+ f"Kubernetes API error retrieving {function_name} resources. {e}: {e.status} - {e.reason}"
156
+ )
157
+ break
158
+
159
+ logger.debug(
160
+ f"Completed pagination for {function_name}: retrieved {len(all_resources)} resources"
161
+ )
162
+ return all_resources
@@ -0,0 +1,161 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import boto3
5
+ from neo4j import Session
6
+
7
+ from cartography.client.aws import list_accounts
8
+ from cartography.client.aws.ecr import get_ecr_images
9
+ from cartography.config import Config
10
+ from cartography.intel.trivy.scanner import cleanup
11
+ from cartography.intel.trivy.scanner import get_json_files_in_s3
12
+ from cartography.intel.trivy.scanner import sync_single_image_from_s3
13
+ from cartography.stats import get_stats_client
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+ stat_handler = get_stats_client("trivy.scanner")
18
+
19
+
20
+ @timeit
21
+ def get_scan_targets(
22
+ neo4j_session: Session,
23
+ account_ids: list[str] | None = None,
24
+ ) -> set[str]:
25
+ """
26
+ Return list of ECR images from all accounts in the graph.
27
+ """
28
+ if not account_ids:
29
+ aws_accounts = list_accounts(neo4j_session)
30
+ else:
31
+ aws_accounts = account_ids
32
+
33
+ ecr_images: set[str] = set()
34
+ for account_id in aws_accounts:
35
+ for _, _, image_uri, _, _ in get_ecr_images(neo4j_session, account_id):
36
+ ecr_images.add(image_uri)
37
+
38
+ return ecr_images
39
+
40
+
41
+ def _get_intersection(
42
+ images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
43
+ ) -> list[tuple[str, str]]:
44
+ """
45
+ Get the intersection of ECR images in the graph and S3 scan results.
46
+
47
+ Args:
48
+ images_in_graph: Set of ECR images in the graph
49
+ json_files: Set of S3 object keys for JSON files
50
+ trivy_s3_prefix: S3 prefix path containing scan results
51
+
52
+ Returns:
53
+ List of tuples (image_uri, s3_object_key)
54
+ """
55
+ intersection = []
56
+ prefix_len = len(trivy_s3_prefix)
57
+ for s3_object_key in json_files:
58
+ # Sample key "123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
59
+ # Sample key "folder/derp/123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
60
+ # Remove the prefix and the .json suffix
61
+ image_uri = s3_object_key[prefix_len:-5]
62
+
63
+ if image_uri in images_in_graph:
64
+ intersection.append((image_uri, s3_object_key))
65
+
66
+ return intersection
67
+
68
+
69
+ @timeit
70
+ def sync_trivy_aws_ecr_from_s3(
71
+ neo4j_session: Session,
72
+ trivy_s3_bucket: str,
73
+ trivy_s3_prefix: str,
74
+ update_tag: int,
75
+ common_job_parameters: dict[str, Any],
76
+ boto3_session: boto3.Session,
77
+ ) -> None:
78
+ """
79
+ Sync Trivy scan results from S3 for AWS ECR images.
80
+
81
+ Args:
82
+ neo4j_session: Neo4j session for database operations
83
+ trivy_s3_bucket: S3 bucket containing scan results
84
+ trivy_s3_prefix: S3 prefix path containing scan results
85
+ update_tag: Update tag for tracking
86
+ common_job_parameters: Common job parameters for cleanup
87
+ boto3_session: boto3 session for S3 operations
88
+ """
89
+ logger.info(
90
+ f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
91
+ )
92
+
93
+ images_in_graph: set[str] = get_scan_targets(neo4j_session)
94
+ json_files: set[str] = get_json_files_in_s3(
95
+ trivy_s3_bucket, trivy_s3_prefix, boto3_session
96
+ )
97
+ intersection: list[tuple[str, str]] = _get_intersection(
98
+ images_in_graph, json_files, trivy_s3_prefix
99
+ )
100
+
101
+ if len(intersection) == 0:
102
+ logger.error(
103
+ f"Trivy sync was configured, but there are no ECR images with S3 json scan results in bucket "
104
+ f"'{trivy_s3_bucket}' with prefix '{trivy_s3_prefix}'. "
105
+ "Skipping Trivy sync to avoid potential data loss. "
106
+ "Please check the S3 bucket and prefix configuration. We expect the json files in s3 to be named "
107
+ f"`<image_uri>.json` and to be in the same bucket and prefix as the scan results. If the prefix is "
108
+ "a folder, it MUST end with a trailing slash '/'. "
109
+ )
110
+ logger.error(f"JSON files in S3: {json_files}")
111
+ raise ValueError("No ECR images with S3 json scan results found.")
112
+
113
+ logger.info(f"Processing {len(intersection)} ECR images with S3 scan results")
114
+ for image_uri, s3_object_key in intersection:
115
+ sync_single_image_from_s3(
116
+ neo4j_session,
117
+ image_uri,
118
+ update_tag,
119
+ trivy_s3_bucket,
120
+ s3_object_key,
121
+ boto3_session,
122
+ )
123
+
124
+ cleanup(neo4j_session, common_job_parameters)
125
+
126
+
127
+ @timeit
128
+ def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
129
+ """
130
+ Start Trivy scan ingestion from S3.
131
+
132
+ Args:
133
+ neo4j_session: Neo4j session for database operations
134
+ config: Configuration object containing S3 settings
135
+ """
136
+ # Check if S3 configuration is provided
137
+ if not config.trivy_s3_bucket:
138
+ logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
139
+ return
140
+
141
+ # Default to empty string if s3 prefix is not provided
142
+ if config.trivy_s3_prefix is None:
143
+ config.trivy_s3_prefix = ""
144
+
145
+ common_job_parameters = {
146
+ "UPDATE_TAG": config.update_tag,
147
+ }
148
+
149
+ # Get ECR images to scan
150
+ boto3_session = boto3.Session()
151
+
152
+ sync_trivy_aws_ecr_from_s3(
153
+ neo4j_session,
154
+ config.trivy_s3_bucket,
155
+ config.trivy_s3_prefix,
156
+ config.update_tag,
157
+ common_job_parameters,
158
+ boto3_session,
159
+ )
160
+
161
+ # Support other Trivy resource types here e.g. if Google Cloud has images.