cartography 0.104.0rc3__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (134) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +104 -3
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/client/core/tx.py +62 -0
  6. cartography/config.py +32 -0
  7. cartography/data/indexes.cypher +0 -37
  8. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
  9. cartography/driftdetect/cli.py +3 -2
  10. cartography/graph/cleanupbuilder.py +198 -41
  11. cartography/graph/job.py +42 -0
  12. cartography/graph/querybuilder.py +136 -2
  13. cartography/graph/statement.py +1 -1
  14. cartography/intel/airbyte/__init__.py +105 -0
  15. cartography/intel/airbyte/connections.py +120 -0
  16. cartography/intel/airbyte/destinations.py +81 -0
  17. cartography/intel/airbyte/organizations.py +59 -0
  18. cartography/intel/airbyte/sources.py +78 -0
  19. cartography/intel/airbyte/tags.py +64 -0
  20. cartography/intel/airbyte/users.py +106 -0
  21. cartography/intel/airbyte/util.py +122 -0
  22. cartography/intel/airbyte/workspaces.py +63 -0
  23. cartography/intel/aws/acm.py +124 -0
  24. cartography/intel/aws/cloudtrail.py +3 -38
  25. cartography/intel/aws/codebuild.py +132 -0
  26. cartography/intel/aws/ecr.py +8 -2
  27. cartography/intel/aws/ecs.py +228 -380
  28. cartography/intel/aws/efs.py +179 -11
  29. cartography/intel/aws/iam.py +1 -1
  30. cartography/intel/aws/identitycenter.py +14 -3
  31. cartography/intel/aws/inspector.py +96 -53
  32. cartography/intel/aws/lambda_function.py +1 -1
  33. cartography/intel/aws/rds.py +2 -1
  34. cartography/intel/aws/resources.py +4 -0
  35. cartography/intel/aws/s3.py +195 -4
  36. cartography/intel/aws/sqs.py +36 -90
  37. cartography/intel/entra/__init__.py +22 -0
  38. cartography/intel/entra/applications.py +366 -0
  39. cartography/intel/entra/groups.py +151 -0
  40. cartography/intel/entra/ou.py +21 -5
  41. cartography/intel/entra/users.py +84 -42
  42. cartography/intel/kubernetes/__init__.py +30 -14
  43. cartography/intel/kubernetes/clusters.py +86 -0
  44. cartography/intel/kubernetes/namespaces.py +59 -57
  45. cartography/intel/kubernetes/pods.py +140 -77
  46. cartography/intel/kubernetes/secrets.py +95 -45
  47. cartography/intel/kubernetes/services.py +131 -67
  48. cartography/intel/kubernetes/util.py +125 -14
  49. cartography/intel/scaleway/__init__.py +127 -0
  50. cartography/intel/scaleway/iam/__init__.py +0 -0
  51. cartography/intel/scaleway/iam/apikeys.py +71 -0
  52. cartography/intel/scaleway/iam/applications.py +71 -0
  53. cartography/intel/scaleway/iam/groups.py +71 -0
  54. cartography/intel/scaleway/iam/users.py +71 -0
  55. cartography/intel/scaleway/instances/__init__.py +0 -0
  56. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  57. cartography/intel/scaleway/instances/instances.py +92 -0
  58. cartography/intel/scaleway/projects.py +79 -0
  59. cartography/intel/scaleway/storage/__init__.py +0 -0
  60. cartography/intel/scaleway/storage/snapshots.py +86 -0
  61. cartography/intel/scaleway/storage/volumes.py +84 -0
  62. cartography/intel/scaleway/utils.py +37 -0
  63. cartography/intel/trivy/__init__.py +161 -0
  64. cartography/intel/trivy/scanner.py +363 -0
  65. cartography/models/airbyte/__init__.py +0 -0
  66. cartography/models/airbyte/connection.py +138 -0
  67. cartography/models/airbyte/destination.py +75 -0
  68. cartography/models/airbyte/organization.py +19 -0
  69. cartography/models/airbyte/source.py +75 -0
  70. cartography/models/airbyte/stream.py +74 -0
  71. cartography/models/airbyte/tag.py +69 -0
  72. cartography/models/airbyte/user.py +111 -0
  73. cartography/models/airbyte/workspace.py +46 -0
  74. cartography/models/aws/acm/__init__.py +0 -0
  75. cartography/models/aws/acm/certificate.py +75 -0
  76. cartography/models/aws/cloudtrail/trail.py +24 -0
  77. cartography/models/aws/codebuild/__init__.py +0 -0
  78. cartography/models/aws/codebuild/project.py +49 -0
  79. cartography/models/aws/ecs/__init__.py +0 -0
  80. cartography/models/aws/ecs/clusters.py +64 -0
  81. cartography/models/aws/ecs/container_definitions.py +93 -0
  82. cartography/models/aws/ecs/container_instances.py +84 -0
  83. cartography/models/aws/ecs/containers.py +99 -0
  84. cartography/models/aws/ecs/services.py +117 -0
  85. cartography/models/aws/ecs/task_definitions.py +135 -0
  86. cartography/models/aws/ecs/tasks.py +110 -0
  87. cartography/models/aws/efs/access_point.py +77 -0
  88. cartography/models/aws/efs/file_system.py +60 -0
  89. cartography/models/aws/efs/mount_target.py +29 -2
  90. cartography/models/aws/s3/notification.py +24 -0
  91. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  92. cartography/models/aws/sqs/__init__.py +0 -0
  93. cartography/models/aws/sqs/queue.py +89 -0
  94. cartography/models/core/common.py +1 -0
  95. cartography/models/core/nodes.py +15 -2
  96. cartography/models/core/relationships.py +44 -0
  97. cartography/models/entra/app_role_assignment.py +115 -0
  98. cartography/models/entra/application.py +47 -0
  99. cartography/models/entra/group.py +91 -0
  100. cartography/models/entra/user.py +17 -51
  101. cartography/models/kubernetes/__init__.py +0 -0
  102. cartography/models/kubernetes/clusters.py +26 -0
  103. cartography/models/kubernetes/containers.py +108 -0
  104. cartography/models/kubernetes/namespaces.py +51 -0
  105. cartography/models/kubernetes/pods.py +80 -0
  106. cartography/models/kubernetes/secrets.py +79 -0
  107. cartography/models/kubernetes/services.py +108 -0
  108. cartography/models/scaleway/__init__.py +0 -0
  109. cartography/models/scaleway/iam/__init__.py +0 -0
  110. cartography/models/scaleway/iam/apikey.py +96 -0
  111. cartography/models/scaleway/iam/application.py +52 -0
  112. cartography/models/scaleway/iam/group.py +95 -0
  113. cartography/models/scaleway/iam/user.py +60 -0
  114. cartography/models/scaleway/instance/__init__.py +0 -0
  115. cartography/models/scaleway/instance/flexibleip.py +52 -0
  116. cartography/models/scaleway/instance/instance.py +118 -0
  117. cartography/models/scaleway/organization.py +19 -0
  118. cartography/models/scaleway/project.py +48 -0
  119. cartography/models/scaleway/storage/__init__.py +0 -0
  120. cartography/models/scaleway/storage/snapshot.py +78 -0
  121. cartography/models/scaleway/storage/volume.py +51 -0
  122. cartography/models/trivy/__init__.py +0 -0
  123. cartography/models/trivy/findings.py +66 -0
  124. cartography/models/trivy/fix.py +66 -0
  125. cartography/models/trivy/package.py +71 -0
  126. cartography/sync.py +10 -4
  127. cartography/util.py +15 -10
  128. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/METADATA +6 -2
  129. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/RECORD +133 -49
  130. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  131. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/WHEEL +0 -0
  132. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/entry_points.txt +0 -0
  133. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/licenses/LICENSE +0 -0
  134. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,124 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import boto3
5
+ import neo4j
6
+
7
+ from cartography.client.core.tx import load
8
+ from cartography.graph.job import GraphJob
9
+ from cartography.models.aws.acm.certificate import ACMCertificateSchema
10
+ from cartography.stats import get_stats_client
11
+ from cartography.util import aws_handle_regions
12
+ from cartography.util import merge_module_sync_metadata
13
+ from cartography.util import timeit
14
+
15
+ logger = logging.getLogger(__name__)
16
+ stat_handler = get_stats_client(__name__)
17
+
18
+
19
+ @timeit
20
+ @aws_handle_regions
21
+ def get_acm_certificates(
22
+ boto3_session: boto3.session.Session, region: str
23
+ ) -> list[dict[str, Any]]:
24
+ client = boto3_session.client("acm", region_name=region)
25
+ paginator = client.get_paginator("list_certificates")
26
+ summaries: list[dict[str, Any]] = []
27
+ for page in paginator.paginate():
28
+ summaries.extend(page.get("CertificateSummaryList", []))
29
+
30
+ details: list[dict[str, Any]] = []
31
+ for summary in summaries:
32
+ arn = summary["CertificateArn"]
33
+ resp = client.describe_certificate(CertificateArn=arn)
34
+ details.append(resp["Certificate"])
35
+ return details
36
+
37
+
38
+ def transform_acm_certificates(
39
+ certificates: list[dict[str, Any]], region: str
40
+ ) -> list[dict[str, Any]]:
41
+ transformed: list[dict[str, Any]] = []
42
+ for cert in certificates:
43
+ item: dict[str, Any] = {
44
+ "Arn": cert["CertificateArn"],
45
+ "DomainName": cert.get("DomainName"),
46
+ "Type": cert.get("Type"),
47
+ "Status": cert.get("Status"),
48
+ "KeyAlgorithm": cert.get("KeyAlgorithm"),
49
+ "SignatureAlgorithm": cert.get("SignatureAlgorithm"),
50
+ "NotBefore": cert.get("NotBefore"),
51
+ "NotAfter": cert.get("NotAfter"),
52
+ "InUseBy": cert.get("InUseBy", []),
53
+ "Region": region,
54
+ }
55
+ # Extract ELBV2 Listener ARNs for relationship creation
56
+ listener_arns = [a for a in item["InUseBy"] if ":listener/" in a]
57
+ if listener_arns:
58
+ item["ELBV2ListenerArns"] = listener_arns
59
+ transformed.append(item)
60
+ return transformed
61
+
62
+
63
+ @timeit
64
+ def load_acm_certificates(
65
+ neo4j_session: neo4j.Session,
66
+ data: list[dict[str, Any]],
67
+ region: str,
68
+ current_aws_account_id: str,
69
+ update_tag: int,
70
+ ) -> None:
71
+ logger.info(f"Loading {len(data)} ACM certificates for region {region} into graph.")
72
+ load(
73
+ neo4j_session,
74
+ ACMCertificateSchema(),
75
+ data,
76
+ lastupdated=update_tag,
77
+ Region=region,
78
+ AWS_ID=current_aws_account_id,
79
+ )
80
+
81
+
82
+ @timeit
83
+ def cleanup_acm_certificates(
84
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
85
+ ) -> None:
86
+ logger.debug("Running ACM certificate cleanup job.")
87
+ GraphJob.from_node_schema(ACMCertificateSchema(), common_job_parameters).run(
88
+ neo4j_session
89
+ )
90
+
91
+
92
+ @timeit
93
+ def sync(
94
+ neo4j_session: neo4j.Session,
95
+ boto3_session: boto3.session.Session,
96
+ regions: list[str],
97
+ current_aws_account_id: str,
98
+ update_tag: int,
99
+ common_job_parameters: dict[str, Any],
100
+ ) -> None:
101
+ for region in regions:
102
+ logger.info(
103
+ f"Syncing ACM certificates for region {region} in account {current_aws_account_id}."
104
+ )
105
+ certs = get_acm_certificates(boto3_session, region)
106
+ transformed = transform_acm_certificates(certs, region)
107
+ load_acm_certificates(
108
+ neo4j_session,
109
+ transformed,
110
+ region,
111
+ current_aws_account_id,
112
+ update_tag,
113
+ )
114
+
115
+ cleanup_acm_certificates(neo4j_session, common_job_parameters)
116
+
117
+ merge_module_sync_metadata(
118
+ neo4j_session,
119
+ group_type="AWSAccount",
120
+ group_id=current_aws_account_id,
121
+ synced_type="ACMCertificate",
122
+ update_tag=update_tag,
123
+ stat_handler=stat_handler,
124
+ )
@@ -4,7 +4,6 @@ from typing import Dict
4
4
  from typing import List
5
5
 
6
6
  import boto3
7
- import botocore.exceptions
8
7
  import neo4j
9
8
 
10
9
  from cartography.client.core.tx import load
@@ -25,10 +24,8 @@ def get_cloudtrail_trails(
25
24
  client = boto3_session.client(
26
25
  "cloudtrail", region_name=region, config=get_botocore_config()
27
26
  )
28
- paginator = client.get_paginator("list_trails")
29
- trails = []
30
- for page in paginator.paginate():
31
- trails.extend(page["Trails"])
27
+
28
+ trails = client.describe_trails()["trailList"]
32
29
 
33
30
  # CloudTrail multi-region trails are shown in list_trails,
34
31
  # but the get_trail call only works in the home region
@@ -36,28 +33,6 @@ def get_cloudtrail_trails(
36
33
  return trails_filtered
37
34
 
38
35
 
39
- @timeit
40
- def get_cloudtrail_trail(
41
- boto3_session: boto3.Session,
42
- region: str,
43
- trail_name: str,
44
- ) -> Dict[str, Any]:
45
- client = boto3_session.client(
46
- "cloudtrail", region_name=region, config=get_botocore_config()
47
- )
48
- trail_details: Dict[str, Any] = {}
49
- try:
50
- response = client.get_trail(Name=trail_name)
51
- trail_details = response["Trail"]
52
- except botocore.exceptions.ClientError as e:
53
- code = e.response["Error"]["Code"]
54
- msg = e.response["Error"]["Message"]
55
- logger.warning(
56
- f"Could not run CloudTrail get_trail due to boto3 error {code}: {msg}. Skipping.",
57
- )
58
- return trail_details
59
-
60
-
61
36
  @timeit
62
37
  def load_cloudtrail_trails(
63
38
  neo4j_session: neo4j.Session,
@@ -105,20 +80,10 @@ def sync(
105
80
  f"Syncing CloudTrail for region '{region}' in account '{current_aws_account_id}'.",
106
81
  )
107
82
  trails = get_cloudtrail_trails(boto3_session, region)
108
- trail_data: List[Dict[str, Any]] = []
109
- for trail in trails:
110
- trail_name = trail["Name"]
111
- trail_details = get_cloudtrail_trail(
112
- boto3_session,
113
- region,
114
- trail_name,
115
- )
116
- if trail_details:
117
- trail_data.append(trail_details)
118
83
 
119
84
  load_cloudtrail_trails(
120
85
  neo4j_session,
121
- trail_data,
86
+ trails,
122
87
  region,
123
88
  current_aws_account_id,
124
89
  update_tag,
@@ -0,0 +1,132 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.codebuild.project import CodeBuildProjectSchema
13
+ from cartography.util import aws_handle_regions
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ @aws_handle_regions
21
+ def get_all_codebuild_projects(
22
+ boto3_session: boto3.Session, region: str
23
+ ) -> List[Dict[str, Any]]:
24
+
25
+ client = boto3_session.client(
26
+ "codebuild", region_name=region, config=get_botocore_config()
27
+ )
28
+ paginator = client.get_paginator("list_projects")
29
+
30
+ all_projects = []
31
+
32
+ for page in paginator.paginate():
33
+ project_names = page.get("projects", [])
34
+ if not project_names:
35
+ continue
36
+
37
+ # AWS batch_get_projects accepts up to 100 project names per call as per AWS documentation.
38
+ for i in range(0, len(project_names), 100):
39
+ batch = project_names[i : i + 100]
40
+ response = client.batch_get_projects(names=batch)
41
+ projects = response.get("projects", [])
42
+ all_projects.extend(projects)
43
+ return all_projects
44
+
45
+
46
+ def transform_codebuild_projects(
47
+ projects: List[Dict[str, Any]], region: str
48
+ ) -> List[Dict[str, Any]]:
49
+ """
50
+ Transform CodeBuild project data for ingestion into Neo4j.
51
+
52
+ - Includes all environment variable names.
53
+ - Variables of type 'PLAINTEXT' retain their values.
54
+ - Other types (e.g., 'PARAMETER_STORE', 'SECRETS_MANAGER') have their values redacted.
55
+ """
56
+ transformed_codebuild_projects = []
57
+ for project in projects:
58
+ env_vars = project.get("environment", {}).get("environmentVariables", [])
59
+ env_var_strings = [
60
+ f"{var.get('name')}={var.get('value') if var.get('type') == 'PLAINTEXT' else '<REDACTED>'}"
61
+ for var in env_vars
62
+ ]
63
+ transformed_project = {
64
+ "arn": project["arn"],
65
+ "created": project.get("created"),
66
+ "environmentVariables": env_var_strings,
67
+ "sourceType": project.get("source", {}).get("type"),
68
+ "sourceLocation": project.get("source", {}).get("location"),
69
+ }
70
+ transformed_codebuild_projects.append(transformed_project)
71
+
72
+ return transformed_codebuild_projects
73
+
74
+
75
+ @timeit
76
+ def load_codebuild_projects(
77
+ neo4j_session: neo4j.Session,
78
+ data: List[Dict[str, Any]],
79
+ region: str,
80
+ current_aws_account_id: str,
81
+ aws_update_tag: int,
82
+ ) -> None:
83
+ logger.info(
84
+ f"Loading CodeBuild {len(data)} projects for region '{region}' into graph.",
85
+ )
86
+ load(
87
+ neo4j_session,
88
+ CodeBuildProjectSchema(),
89
+ data,
90
+ lastupdated=aws_update_tag,
91
+ Region=region,
92
+ AWS_ID=current_aws_account_id,
93
+ )
94
+
95
+
96
+ @timeit
97
+ def cleanup(
98
+ neo4j_session: neo4j.Session,
99
+ common_job_parameters: Dict[str, Any],
100
+ ) -> None:
101
+ logger.debug("Running Efs cleanup job.")
102
+ GraphJob.from_node_schema(CodeBuildProjectSchema(), common_job_parameters).run(
103
+ neo4j_session
104
+ )
105
+
106
+
107
+ @timeit
108
+ def sync(
109
+ neo4j_session: neo4j.Session,
110
+ boto3_session: boto3.session.Session,
111
+ regions: List[str],
112
+ current_aws_account_id: str,
113
+ update_tag: int,
114
+ common_job_parameters: Dict[str, Any],
115
+ ) -> None:
116
+ for region in regions:
117
+ logger.info(
118
+ f"Syncing CodeBuild for region '{region}' in account '{current_aws_account_id}'.",
119
+ )
120
+
121
+ projects = get_all_codebuild_projects(boto3_session, region)
122
+ transformed_projects = transform_codebuild_projects(projects, region)
123
+
124
+ load_codebuild_projects(
125
+ neo4j_session,
126
+ transformed_projects,
127
+ region,
128
+ current_aws_account_id,
129
+ update_tag,
130
+ )
131
+
132
+ cleanup(neo4j_session, common_job_parameters)
@@ -107,9 +107,12 @@ def load_ecr_repositories(
107
107
  def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
108
108
  """
109
109
  Ensure that we only load ECRImage nodes to the graph if they have a defined imageDigest field.
110
+ Process repositories in a consistent order to handle overlapping image digests deterministically.
110
111
  """
111
112
  repo_images_list = []
112
- for repo_uri, repo_images in repo_data.items():
113
+ # Sort repository URIs to ensure consistent processing order
114
+ for repo_uri in sorted(repo_data.keys()):
115
+ repo_images = repo_data[repo_uri]
113
116
  for img in repo_images:
114
117
  if "imageDigest" in img and img["imageDigest"]:
115
118
  img["repo_uri"] = repo_uri
@@ -214,7 +217,9 @@ def _get_image_data(
214
217
  )
215
218
  image_data[repo["repositoryUri"]] = repo_image_obj
216
219
 
217
- to_synchronous(*[async_get_images(repo) for repo in repositories])
220
+ # Sort repositories by name to ensure consistent processing order
221
+ sorted_repos = sorted(repositories, key=lambda x: x["repositoryName"])
222
+ to_synchronous(*[async_get_images(repo) for repo in sorted_repos])
218
223
 
219
224
  return image_data
220
225
 
@@ -237,6 +242,7 @@ def sync(
237
242
  image_data = {}
238
243
  repositories = get_ecr_repositories(boto3_session, region)
239
244
  image_data = _get_image_data(boto3_session, region, repositories)
245
+ # len here should be 1!
240
246
  load_ecr_repositories(
241
247
  neo4j_session,
242
248
  repositories,