cartography 0.104.0rc3__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (134) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +104 -3
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/client/core/tx.py +62 -0
  6. cartography/config.py +32 -0
  7. cartography/data/indexes.cypher +0 -37
  8. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
  9. cartography/driftdetect/cli.py +3 -2
  10. cartography/graph/cleanupbuilder.py +198 -41
  11. cartography/graph/job.py +42 -0
  12. cartography/graph/querybuilder.py +136 -2
  13. cartography/graph/statement.py +1 -1
  14. cartography/intel/airbyte/__init__.py +105 -0
  15. cartography/intel/airbyte/connections.py +120 -0
  16. cartography/intel/airbyte/destinations.py +81 -0
  17. cartography/intel/airbyte/organizations.py +59 -0
  18. cartography/intel/airbyte/sources.py +78 -0
  19. cartography/intel/airbyte/tags.py +64 -0
  20. cartography/intel/airbyte/users.py +106 -0
  21. cartography/intel/airbyte/util.py +122 -0
  22. cartography/intel/airbyte/workspaces.py +63 -0
  23. cartography/intel/aws/acm.py +124 -0
  24. cartography/intel/aws/cloudtrail.py +3 -38
  25. cartography/intel/aws/codebuild.py +132 -0
  26. cartography/intel/aws/ecr.py +8 -2
  27. cartography/intel/aws/ecs.py +228 -380
  28. cartography/intel/aws/efs.py +179 -11
  29. cartography/intel/aws/iam.py +1 -1
  30. cartography/intel/aws/identitycenter.py +14 -3
  31. cartography/intel/aws/inspector.py +96 -53
  32. cartography/intel/aws/lambda_function.py +1 -1
  33. cartography/intel/aws/rds.py +2 -1
  34. cartography/intel/aws/resources.py +4 -0
  35. cartography/intel/aws/s3.py +195 -4
  36. cartography/intel/aws/sqs.py +36 -90
  37. cartography/intel/entra/__init__.py +22 -0
  38. cartography/intel/entra/applications.py +366 -0
  39. cartography/intel/entra/groups.py +151 -0
  40. cartography/intel/entra/ou.py +21 -5
  41. cartography/intel/entra/users.py +84 -42
  42. cartography/intel/kubernetes/__init__.py +30 -14
  43. cartography/intel/kubernetes/clusters.py +86 -0
  44. cartography/intel/kubernetes/namespaces.py +59 -57
  45. cartography/intel/kubernetes/pods.py +140 -77
  46. cartography/intel/kubernetes/secrets.py +95 -45
  47. cartography/intel/kubernetes/services.py +131 -67
  48. cartography/intel/kubernetes/util.py +125 -14
  49. cartography/intel/scaleway/__init__.py +127 -0
  50. cartography/intel/scaleway/iam/__init__.py +0 -0
  51. cartography/intel/scaleway/iam/apikeys.py +71 -0
  52. cartography/intel/scaleway/iam/applications.py +71 -0
  53. cartography/intel/scaleway/iam/groups.py +71 -0
  54. cartography/intel/scaleway/iam/users.py +71 -0
  55. cartography/intel/scaleway/instances/__init__.py +0 -0
  56. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  57. cartography/intel/scaleway/instances/instances.py +92 -0
  58. cartography/intel/scaleway/projects.py +79 -0
  59. cartography/intel/scaleway/storage/__init__.py +0 -0
  60. cartography/intel/scaleway/storage/snapshots.py +86 -0
  61. cartography/intel/scaleway/storage/volumes.py +84 -0
  62. cartography/intel/scaleway/utils.py +37 -0
  63. cartography/intel/trivy/__init__.py +161 -0
  64. cartography/intel/trivy/scanner.py +363 -0
  65. cartography/models/airbyte/__init__.py +0 -0
  66. cartography/models/airbyte/connection.py +138 -0
  67. cartography/models/airbyte/destination.py +75 -0
  68. cartography/models/airbyte/organization.py +19 -0
  69. cartography/models/airbyte/source.py +75 -0
  70. cartography/models/airbyte/stream.py +74 -0
  71. cartography/models/airbyte/tag.py +69 -0
  72. cartography/models/airbyte/user.py +111 -0
  73. cartography/models/airbyte/workspace.py +46 -0
  74. cartography/models/aws/acm/__init__.py +0 -0
  75. cartography/models/aws/acm/certificate.py +75 -0
  76. cartography/models/aws/cloudtrail/trail.py +24 -0
  77. cartography/models/aws/codebuild/__init__.py +0 -0
  78. cartography/models/aws/codebuild/project.py +49 -0
  79. cartography/models/aws/ecs/__init__.py +0 -0
  80. cartography/models/aws/ecs/clusters.py +64 -0
  81. cartography/models/aws/ecs/container_definitions.py +93 -0
  82. cartography/models/aws/ecs/container_instances.py +84 -0
  83. cartography/models/aws/ecs/containers.py +99 -0
  84. cartography/models/aws/ecs/services.py +117 -0
  85. cartography/models/aws/ecs/task_definitions.py +135 -0
  86. cartography/models/aws/ecs/tasks.py +110 -0
  87. cartography/models/aws/efs/access_point.py +77 -0
  88. cartography/models/aws/efs/file_system.py +60 -0
  89. cartography/models/aws/efs/mount_target.py +29 -2
  90. cartography/models/aws/s3/notification.py +24 -0
  91. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  92. cartography/models/aws/sqs/__init__.py +0 -0
  93. cartography/models/aws/sqs/queue.py +89 -0
  94. cartography/models/core/common.py +1 -0
  95. cartography/models/core/nodes.py +15 -2
  96. cartography/models/core/relationships.py +44 -0
  97. cartography/models/entra/app_role_assignment.py +115 -0
  98. cartography/models/entra/application.py +47 -0
  99. cartography/models/entra/group.py +91 -0
  100. cartography/models/entra/user.py +17 -51
  101. cartography/models/kubernetes/__init__.py +0 -0
  102. cartography/models/kubernetes/clusters.py +26 -0
  103. cartography/models/kubernetes/containers.py +108 -0
  104. cartography/models/kubernetes/namespaces.py +51 -0
  105. cartography/models/kubernetes/pods.py +80 -0
  106. cartography/models/kubernetes/secrets.py +79 -0
  107. cartography/models/kubernetes/services.py +108 -0
  108. cartography/models/scaleway/__init__.py +0 -0
  109. cartography/models/scaleway/iam/__init__.py +0 -0
  110. cartography/models/scaleway/iam/apikey.py +96 -0
  111. cartography/models/scaleway/iam/application.py +52 -0
  112. cartography/models/scaleway/iam/group.py +95 -0
  113. cartography/models/scaleway/iam/user.py +60 -0
  114. cartography/models/scaleway/instance/__init__.py +0 -0
  115. cartography/models/scaleway/instance/flexibleip.py +52 -0
  116. cartography/models/scaleway/instance/instance.py +118 -0
  117. cartography/models/scaleway/organization.py +19 -0
  118. cartography/models/scaleway/project.py +48 -0
  119. cartography/models/scaleway/storage/__init__.py +0 -0
  120. cartography/models/scaleway/storage/snapshot.py +78 -0
  121. cartography/models/scaleway/storage/volume.py +51 -0
  122. cartography/models/trivy/__init__.py +0 -0
  123. cartography/models/trivy/findings.py +66 -0
  124. cartography/models/trivy/fix.py +66 -0
  125. cartography/models/trivy/package.py +71 -0
  126. cartography/sync.py +10 -4
  127. cartography/util.py +15 -10
  128. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/METADATA +6 -2
  129. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/RECORD +133 -49
  130. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  131. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/WHEEL +0 -0
  132. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/entry_points.txt +0 -0
  133. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/licenses/LICENSE +0 -0
  134. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,92 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ import scaleway
6
+ from scaleway.instance.v1 import InstanceV1API
7
+ from scaleway.instance.v1 import Server
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.scaleway.utils import DEFAULT_ZONE
12
+ from cartography.intel.scaleway.utils import scaleway_obj_to_dict
13
+ from cartography.models.scaleway.instance.instance import ScalewayInstanceSchema
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ def sync(
21
+ neo4j_session: neo4j.Session,
22
+ client: scaleway.Client,
23
+ common_job_parameters: dict[str, Any],
24
+ org_id: str,
25
+ projects_id: list[str],
26
+ update_tag: int,
27
+ ) -> None:
28
+ instances = get(client, org_id)
29
+ instances_by_project = transform_instances(instances)
30
+ load_instances(neo4j_session, instances_by_project, update_tag)
31
+ cleanup(neo4j_session, projects_id, common_job_parameters)
32
+
33
+
34
+ @timeit
35
+ def get(
36
+ client: scaleway.Client,
37
+ org_id: str,
38
+ ) -> list[Server]:
39
+ api = InstanceV1API(client)
40
+ return api.list_servers_all(organization=org_id, zone=DEFAULT_ZONE)
41
+
42
+
43
+ def transform_instances(
44
+ instances: list[Server],
45
+ ) -> dict[str, list[dict[str, Any]]]:
46
+ result: dict[str, list[dict[str, Any]]] = {}
47
+ for instance in instances:
48
+ project_id = instance.project
49
+ formatted_instance = scaleway_obj_to_dict(instance)
50
+ formatted_instance["public_ips"] = [
51
+ ip["id"] for ip in formatted_instance.get("public_ips", [])
52
+ ]
53
+ formatted_instance["volumes_id"] = [
54
+ volume["id"] for volume in formatted_instance.get("volumes", {}).values()
55
+ ]
56
+ result.setdefault(project_id, []).append(formatted_instance)
57
+ return result
58
+
59
+
60
+ @timeit
61
+ def load_instances(
62
+ neo4j_session: neo4j.Session,
63
+ data: dict[str, list[dict[str, Any]]],
64
+ update_tag: int,
65
+ ) -> None:
66
+ for project_id, instances in data.items():
67
+ logger.info(
68
+ "Loading %d Scaleway Instance in project '%s' into Neo4j.",
69
+ len(instances),
70
+ project_id,
71
+ )
72
+ load(
73
+ neo4j_session,
74
+ ScalewayInstanceSchema(),
75
+ instances,
76
+ lastupdated=update_tag,
77
+ PROJECT_ID=project_id,
78
+ )
79
+
80
+
81
+ @timeit
82
+ def cleanup(
83
+ neo4j_session: neo4j.Session,
84
+ projects_id: list[str],
85
+ common_job_parameters: dict[str, Any],
86
+ ) -> None:
87
+ for project_id in projects_id:
88
+ scopped_job_parameters = common_job_parameters.copy()
89
+ scopped_job_parameters["PROJECT_ID"] = project_id
90
+ GraphJob.from_node_schema(ScalewayInstanceSchema(), scopped_job_parameters).run(
91
+ neo4j_session
92
+ )
@@ -0,0 +1,79 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ import scaleway
6
+ from scaleway.account.v3 import AccountV3ProjectAPI
7
+ from scaleway.account.v3 import Project
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.scaleway.utils import scaleway_obj_to_dict
12
+ from cartography.models.scaleway.organization import ScalewayOrganizationSchema
13
+ from cartography.models.scaleway.project import ScalewayProjectSchema
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ def sync(
21
+ neo4j_session: neo4j.Session,
22
+ client: scaleway.Client,
23
+ common_job_parameters: dict[str, Any],
24
+ org_id: str,
25
+ update_tag: int,
26
+ ) -> list[dict]:
27
+ projects = get(client, org_id)
28
+ formatted_projects = transform_projects(projects)
29
+ load_projects(neo4j_session, formatted_projects, org_id, update_tag)
30
+ cleanup(neo4j_session, common_job_parameters)
31
+ return formatted_projects
32
+
33
+
34
+ @timeit
35
+ def get(
36
+ client: scaleway.Client,
37
+ org_id: str,
38
+ ) -> list[Project]:
39
+ api = AccountV3ProjectAPI(client)
40
+ return api.list_projects_all(organization_id=org_id)
41
+
42
+
43
+ def transform_projects(projects: list[Project]) -> list[dict[str, Any]]:
44
+ formatted_projects = []
45
+ for project in projects:
46
+ formatted_projects.append(scaleway_obj_to_dict(project))
47
+ return formatted_projects
48
+
49
+
50
+ @timeit
51
+ def load_projects(
52
+ neo4j_session: neo4j.Session,
53
+ data: list[dict[str, Any]],
54
+ org_id: str,
55
+ update_tag: int,
56
+ ) -> None:
57
+ load(
58
+ neo4j_session,
59
+ ScalewayOrganizationSchema(),
60
+ [{"id": org_id}],
61
+ lastupdated=update_tag,
62
+ )
63
+ logger.info("Loading %d Scaleway Projects into Neo4j.", len(data))
64
+ load(
65
+ neo4j_session,
66
+ ScalewayProjectSchema(),
67
+ data,
68
+ lastupdated=update_tag,
69
+ ORG_ID=org_id,
70
+ )
71
+
72
+
73
+ @timeit
74
+ def cleanup(
75
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
76
+ ) -> None:
77
+ GraphJob.from_node_schema(ScalewayProjectSchema(), common_job_parameters).run(
78
+ neo4j_session
79
+ )
File without changes
@@ -0,0 +1,86 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ import scaleway
6
+ from scaleway.instance.v1 import InstanceV1API
7
+ from scaleway.instance.v1 import Snapshot
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.scaleway.utils import DEFAULT_ZONE
12
+ from cartography.intel.scaleway.utils import scaleway_obj_to_dict
13
+ from cartography.models.scaleway.storage.snapshot import ScalewayVolumeSnapshotSchema
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ def sync(
21
+ neo4j_session: neo4j.Session,
22
+ client: scaleway.Client,
23
+ common_job_parameters: dict[str, Any],
24
+ org_id: str,
25
+ projects_id: list[str],
26
+ update_tag: int,
27
+ ) -> None:
28
+ snapshots = get(client, org_id)
29
+ snapshots_by_project = transform_snapshots(snapshots)
30
+ load_snapshots(neo4j_session, snapshots_by_project, update_tag)
31
+ cleanup(neo4j_session, projects_id, common_job_parameters)
32
+
33
+
34
+ @timeit
35
+ def get(
36
+ client: scaleway.Client,
37
+ org_id: str,
38
+ ) -> list[Snapshot]:
39
+ api = InstanceV1API(client)
40
+ return api.list_snapshots_all(organization=org_id, zone=DEFAULT_ZONE)
41
+
42
+
43
+ def transform_snapshots(
44
+ snapshots: list[Snapshot],
45
+ ) -> dict[str, list[dict[str, Any]]]:
46
+ result: dict[str, list[dict[str, Any]]] = {}
47
+ for snapshot in snapshots:
48
+ project_id = snapshot.project
49
+ formatted_snapshot = scaleway_obj_to_dict(snapshot)
50
+ result.setdefault(project_id, []).append(formatted_snapshot)
51
+ return result
52
+
53
+
54
+ @timeit
55
+ def load_snapshots(
56
+ neo4j_session: neo4j.Session,
57
+ data: dict[str, list[dict[str, Any]]],
58
+ update_tag: int,
59
+ ) -> None:
60
+ for project_id, snapshots in data.items():
61
+ logger.info(
62
+ "Loading %d Scaleway InstanceSnapshots in project '%s' into Neo4j.",
63
+ len(snapshots),
64
+ project_id,
65
+ )
66
+ load(
67
+ neo4j_session,
68
+ ScalewayVolumeSnapshotSchema(),
69
+ snapshots,
70
+ lastupdated=update_tag,
71
+ PROJECT_ID=project_id,
72
+ )
73
+
74
+
75
+ @timeit
76
+ def cleanup(
77
+ neo4j_session: neo4j.Session,
78
+ projects_id: list[str],
79
+ common_job_parameters: dict[str, Any],
80
+ ) -> None:
81
+ for project_id in projects_id:
82
+ scoped_job_parameters = common_job_parameters.copy()
83
+ scoped_job_parameters["PROJECT_ID"] = project_id
84
+ GraphJob.from_node_schema(
85
+ ScalewayVolumeSnapshotSchema(), scoped_job_parameters
86
+ ).run(neo4j_session)
@@ -0,0 +1,84 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ import scaleway
6
+ from scaleway.instance.v1 import InstanceV1API
7
+ from scaleway.instance.v1 import Volume
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.scaleway.utils import DEFAULT_ZONE
12
+ from cartography.intel.scaleway.utils import scaleway_obj_to_dict
13
+ from cartography.models.scaleway.storage.volume import ScalewayVolumeSchema
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ def sync(
21
+ neo4j_session: neo4j.Session,
22
+ client: scaleway.Client,
23
+ common_job_parameters: dict[str, Any],
24
+ org_id: str,
25
+ projects_id: list[str],
26
+ update_tag: int,
27
+ ) -> None:
28
+ volumes = get(client, org_id)
29
+ volumes_by_project = transform_volumes(volumes)
30
+ load_volumes(neo4j_session, volumes_by_project, update_tag)
31
+ cleanup(neo4j_session, projects_id, common_job_parameters)
32
+
33
+
34
+ @timeit
35
+ def get(
36
+ client: scaleway.Client,
37
+ org_id: str,
38
+ ) -> list[Volume]:
39
+ api = InstanceV1API(client)
40
+ return api.list_volumes_all(organization=org_id, zone=DEFAULT_ZONE)
41
+
42
+
43
+ def transform_volumes(volumes: list[Volume]) -> dict[str, list[dict[str, Any]]]:
44
+ result: dict[str, list[dict[str, Any]]] = {}
45
+ for volume in volumes:
46
+ project_id = volume.project
47
+ formatted_volume = scaleway_obj_to_dict(volume)
48
+ result.setdefault(project_id, []).append(formatted_volume)
49
+ return result
50
+
51
+
52
+ @timeit
53
+ def load_volumes(
54
+ neo4j_session: neo4j.Session,
55
+ data: dict[str, list[dict[str, Any]]],
56
+ update_tag: int,
57
+ ) -> None:
58
+ for project_id, volumes in data.items():
59
+ logger.info(
60
+ "Loading %d Scaleway InstanceVolumes in project '%s' into Neo4j.",
61
+ len(volumes),
62
+ project_id,
63
+ )
64
+ load(
65
+ neo4j_session,
66
+ ScalewayVolumeSchema(),
67
+ volumes,
68
+ lastupdated=update_tag,
69
+ PROJECT_ID=project_id,
70
+ )
71
+
72
+
73
+ @timeit
74
+ def cleanup(
75
+ neo4j_session: neo4j.Session,
76
+ projects_id: list[str],
77
+ common_job_parameters: dict[str, Any],
78
+ ) -> None:
79
+ for project_id in projects_id:
80
+ scoped_job_parameters = common_job_parameters.copy()
81
+ scoped_job_parameters["PROJECT_ID"] = project_id
82
+ GraphJob.from_node_schema(ScalewayVolumeSchema(), scoped_job_parameters).run(
83
+ neo4j_session
84
+ )
@@ -0,0 +1,37 @@
1
+ import dataclasses
2
+ from typing import Any
3
+
4
+ # Zone does not really matter for readonly access, but we need to set it
5
+ DEFAULT_ZONE = "fr-par-1"
6
+
7
+
8
+ def scaleway_obj_to_dict(obj: Any) -> dict[str, Any]:
9
+ """Transform a Scaleway object (dataclass, dict, or list) into a dictionary."""
10
+ if isinstance(obj, type) or not dataclasses.is_dataclass(obj):
11
+ raise TypeError(f"Expected a dataclass, got {type(obj).__name__} instead.")
12
+ result: dict[str, Any] = dataclasses.asdict(obj)
13
+
14
+ for k in list(result.keys()):
15
+ result[k] = _scaleway_element_sanitize(result[k])
16
+ return result
17
+
18
+
19
+ def _scaleway_element_sanitize(element: Any) -> Any:
20
+ """Sanitize a Scaleway element by removing empty strings and lists."""
21
+ if isinstance(element, str) and element == "":
22
+ return None
23
+ elif isinstance(element, list):
24
+ if len(element) == 0:
25
+ return None
26
+ return [
27
+ _scaleway_element_sanitize(item) for item in element if item is not None
28
+ ]
29
+ elif isinstance(element, dict):
30
+ return {
31
+ k: _scaleway_element_sanitize(v)
32
+ for k, v in element.items()
33
+ if v is not None
34
+ }
35
+ elif dataclasses.is_dataclass(element):
36
+ return scaleway_obj_to_dict(element)
37
+ return element
@@ -0,0 +1,161 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import boto3
5
+ from neo4j import Session
6
+
7
+ from cartography.client.aws import list_accounts
8
+ from cartography.client.aws.ecr import get_ecr_images
9
+ from cartography.config import Config
10
+ from cartography.intel.trivy.scanner import cleanup
11
+ from cartography.intel.trivy.scanner import get_json_files_in_s3
12
+ from cartography.intel.trivy.scanner import sync_single_image_from_s3
13
+ from cartography.stats import get_stats_client
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+ stat_handler = get_stats_client("trivy.scanner")
18
+
19
+
20
+ @timeit
21
+ def get_scan_targets(
22
+ neo4j_session: Session,
23
+ account_ids: list[str] | None = None,
24
+ ) -> set[str]:
25
+ """
26
+ Return list of ECR images from all accounts in the graph.
27
+ """
28
+ if not account_ids:
29
+ aws_accounts = list_accounts(neo4j_session)
30
+ else:
31
+ aws_accounts = account_ids
32
+
33
+ ecr_images: set[str] = set()
34
+ for account_id in aws_accounts:
35
+ for _, _, image_uri, _, _ in get_ecr_images(neo4j_session, account_id):
36
+ ecr_images.add(image_uri)
37
+
38
+ return ecr_images
39
+
40
+
41
+ def _get_intersection(
42
+ images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
43
+ ) -> list[tuple[str, str]]:
44
+ """
45
+ Get the intersection of ECR images in the graph and S3 scan results.
46
+
47
+ Args:
48
+ images_in_graph: Set of ECR images in the graph
49
+ json_files: Set of S3 object keys for JSON files
50
+ trivy_s3_prefix: S3 prefix path containing scan results
51
+
52
+ Returns:
53
+ List of tuples (image_uri, s3_object_key)
54
+ """
55
+ intersection = []
56
+ prefix_len = len(trivy_s3_prefix)
57
+ for s3_object_key in json_files:
58
+ # Sample key "123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
59
+ # Sample key "folder/derp/123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
60
+ # Remove the prefix and the .json suffix
61
+ image_uri = s3_object_key[prefix_len:-5]
62
+
63
+ if image_uri in images_in_graph:
64
+ intersection.append((image_uri, s3_object_key))
65
+
66
+ return intersection
67
+
68
+
69
+ @timeit
70
+ def sync_trivy_aws_ecr_from_s3(
71
+ neo4j_session: Session,
72
+ trivy_s3_bucket: str,
73
+ trivy_s3_prefix: str,
74
+ update_tag: int,
75
+ common_job_parameters: dict[str, Any],
76
+ boto3_session: boto3.Session,
77
+ ) -> None:
78
+ """
79
+ Sync Trivy scan results from S3 for AWS ECR images.
80
+
81
+ Args:
82
+ neo4j_session: Neo4j session for database operations
83
+ trivy_s3_bucket: S3 bucket containing scan results
84
+ trivy_s3_prefix: S3 prefix path containing scan results
85
+ update_tag: Update tag for tracking
86
+ common_job_parameters: Common job parameters for cleanup
87
+ boto3_session: boto3 session for S3 operations
88
+ """
89
+ logger.info(
90
+ f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
91
+ )
92
+
93
+ images_in_graph: set[str] = get_scan_targets(neo4j_session)
94
+ json_files: set[str] = get_json_files_in_s3(
95
+ trivy_s3_bucket, trivy_s3_prefix, boto3_session
96
+ )
97
+ intersection: list[tuple[str, str]] = _get_intersection(
98
+ images_in_graph, json_files, trivy_s3_prefix
99
+ )
100
+
101
+ if len(intersection) == 0:
102
+ logger.error(
103
+ f"Trivy sync was configured, but there are no ECR images with S3 json scan results in bucket "
104
+ f"'{trivy_s3_bucket}' with prefix '{trivy_s3_prefix}'. "
105
+ "Skipping Trivy sync to avoid potential data loss. "
106
+ "Please check the S3 bucket and prefix configuration. We expect the json files in s3 to be named "
107
+ f"`<image_uri>.json` and to be in the same bucket and prefix as the scan results. If the prefix is "
108
+ "a folder, it MUST end with a trailing slash '/'. "
109
+ )
110
+ logger.error(f"JSON files in S3: {json_files}")
111
+ raise ValueError("No ECR images with S3 json scan results found.")
112
+
113
+ logger.info(f"Processing {len(intersection)} ECR images with S3 scan results")
114
+ for image_uri, s3_object_key in intersection:
115
+ sync_single_image_from_s3(
116
+ neo4j_session,
117
+ image_uri,
118
+ update_tag,
119
+ trivy_s3_bucket,
120
+ s3_object_key,
121
+ boto3_session,
122
+ )
123
+
124
+ cleanup(neo4j_session, common_job_parameters)
125
+
126
+
127
+ @timeit
128
+ def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
129
+ """
130
+ Start Trivy scan ingestion from S3.
131
+
132
+ Args:
133
+ neo4j_session: Neo4j session for database operations
134
+ config: Configuration object containing S3 settings
135
+ """
136
+ # Check if S3 configuration is provided
137
+ if not config.trivy_s3_bucket:
138
+ logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
139
+ return
140
+
141
+ # Default to empty string if s3 prefix is not provided
142
+ if config.trivy_s3_prefix is None:
143
+ config.trivy_s3_prefix = ""
144
+
145
+ common_job_parameters = {
146
+ "UPDATE_TAG": config.update_tag,
147
+ }
148
+
149
+ # Get ECR images to scan
150
+ boto3_session = boto3.Session()
151
+
152
+ sync_trivy_aws_ecr_from_s3(
153
+ neo4j_session,
154
+ config.trivy_s3_bucket,
155
+ config.trivy_s3_prefix,
156
+ config.update_tag,
157
+ common_job_parameters,
158
+ boto3_session,
159
+ )
160
+
161
+ # Support other Trivy resource types here e.g. if Google Cloud has images.