cartography 0.117.0__py3-none-any.whl → 0.118.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (57) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +11 -0
  3. cartography/config.py +5 -0
  4. cartography/graph/job.py +6 -2
  5. cartography/graph/statement.py +4 -0
  6. cartography/intel/aws/__init__.py +1 -0
  7. cartography/intel/aws/apigateway.py +18 -5
  8. cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
  9. cartography/intel/aws/ec2/internet_gateways.py +4 -2
  10. cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
  11. cartography/intel/aws/ec2/network_interfaces.py +4 -0
  12. cartography/intel/aws/ec2/reserved_instances.py +3 -1
  13. cartography/intel/aws/ec2/tgw.py +11 -5
  14. cartography/intel/aws/ec2/volumes.py +1 -1
  15. cartography/intel/aws/ecr.py +202 -26
  16. cartography/intel/aws/elasticsearch.py +13 -4
  17. cartography/intel/aws/identitycenter.py +93 -54
  18. cartography/intel/aws/inspector.py +26 -14
  19. cartography/intel/aws/permission_relationships.py +3 -3
  20. cartography/intel/aws/s3.py +26 -13
  21. cartography/intel/aws/ssm.py +3 -5
  22. cartography/intel/azure/compute.py +9 -4
  23. cartography/intel/azure/cosmosdb.py +31 -15
  24. cartography/intel/azure/sql.py +25 -12
  25. cartography/intel/azure/storage.py +19 -9
  26. cartography/intel/azure/subscription.py +3 -1
  27. cartography/intel/crowdstrike/spotlight.py +5 -2
  28. cartography/intel/entra/app_role_assignments.py +9 -2
  29. cartography/intel/gcp/__init__.py +26 -9
  30. cartography/intel/gcp/clients.py +8 -4
  31. cartography/intel/gcp/compute.py +39 -18
  32. cartography/intel/gcp/crm/folders.py +9 -3
  33. cartography/intel/gcp/crm/orgs.py +8 -3
  34. cartography/intel/gcp/crm/projects.py +14 -3
  35. cartography/intel/jamf/computers.py +7 -1
  36. cartography/intel/oci/iam.py +23 -9
  37. cartography/intel/oci/organizations.py +3 -1
  38. cartography/intel/oci/utils.py +28 -5
  39. cartography/intel/okta/awssaml.py +8 -7
  40. cartography/intel/pagerduty/escalation_policies.py +13 -6
  41. cartography/intel/pagerduty/schedules.py +9 -4
  42. cartography/intel/pagerduty/services.py +7 -3
  43. cartography/intel/pagerduty/teams.py +5 -2
  44. cartography/intel/pagerduty/users.py +3 -1
  45. cartography/intel/pagerduty/vendors.py +3 -1
  46. cartography/intel/trivy/__init__.py +109 -58
  47. cartography/models/aws/ec2/networkinterfaces.py +2 -0
  48. cartography/models/aws/ecr/image.py +8 -0
  49. cartography/models/aws/ecr/repository_image.py +1 -1
  50. cartography/sync.py +1 -1
  51. cartography/util.py +5 -1
  52. {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/METADATA +3 -3
  53. {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/RECORD +57 -57
  54. {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/WHEEL +0 -0
  55. {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/entry_points.txt +0 -0
  56. {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/licenses/LICENSE +0 -0
  57. {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ import dateutil.parser
7
7
  import neo4j
8
8
  from pdpyras import APISession
9
9
 
10
+ from cartography.client.core.tx import run_write_query
10
11
  from cartography.util import timeit
11
12
 
12
13
  logger = logging.getLogger(__name__)
@@ -96,7 +97,8 @@ def load_service_data(
96
97
  for team in service["teams"]:
97
98
  team_relations.append({"service": service["id"], "team": team["id"]})
98
99
 
99
- neo4j_session.run(
100
+ run_write_query(
101
+ neo4j_session,
100
102
  ingestion_cypher_query,
101
103
  Services=data,
102
104
  update_tag=update_tag,
@@ -120,7 +122,8 @@ def _attach_teams(
120
122
  MERGE (t)-[r:ASSOCIATED_WITH]->(s)
121
123
  ON CREATE SET r.firstseen = timestamp()
122
124
  """
123
- neo4j_session.run(
125
+ run_write_query(
126
+ neo4j_session,
124
127
  ingestion_cypher_query,
125
128
  Relations=data,
126
129
  update_tag=update_tag,
@@ -162,7 +165,8 @@ def load_integration_data(
162
165
  created_at = dateutil.parser.parse(integration["created_at"])
163
166
  integration["created_at"] = int(created_at.timestamp())
164
167
 
165
- neo4j_session.run(
168
+ run_write_query(
169
+ neo4j_session,
166
170
  ingestion_cypher_query,
167
171
  Integrations=data,
168
172
  update_tag=update_tag,
@@ -6,6 +6,7 @@ from typing import List
6
6
  import neo4j
7
7
  from pdpyras import APISession
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import timeit
10
11
 
11
12
  logger = logging.getLogger(__name__)
@@ -68,7 +69,8 @@ def load_team_data(
68
69
  """
69
70
  logger.info(f"Loading {len(data)} pagerduty teams.")
70
71
 
71
- neo4j_session.run(
72
+ run_write_query(
73
+ neo4j_session,
72
74
  ingestion_cypher_query,
73
75
  Teams=data,
74
76
  update_tag=update_tag,
@@ -90,7 +92,8 @@ def load_team_relations(
90
92
  ON CREATE SET r.firstseen = timestamp()
91
93
  SET r.role = relation.role
92
94
  """
93
- neo4j_session.run(
95
+ run_write_query(
96
+ neo4j_session,
94
97
  ingestion_cypher_query,
95
98
  Relations=data,
96
99
  update_tag=update_tag,
@@ -6,6 +6,7 @@ from typing import List
6
6
  import neo4j
7
7
  from pdpyras import APISession
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import timeit
10
11
 
11
12
  logger = logging.getLogger(__name__)
@@ -57,7 +58,8 @@ def load_user_data(
57
58
  """
58
59
  logger.info(f"Loading {len(data)} pagerduty users.")
59
60
 
60
- neo4j_session.run(
61
+ run_write_query(
62
+ neo4j_session,
61
63
  ingestion_cypher_query,
62
64
  Users=data,
63
65
  update_tag=update_tag,
@@ -6,6 +6,7 @@ from typing import List
6
6
  import neo4j
7
7
  from pdpyras import APISession
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import timeit
10
11
 
11
12
  logger = logging.getLogger(__name__)
@@ -53,7 +54,8 @@ def load_vendor_data(
53
54
  """
54
55
  logger.info(f"Loading {len(data)} pagerduty vendors.")
55
56
 
56
- neo4j_session.run(
57
+ run_write_query(
58
+ neo4j_session,
57
59
  ingestion_cypher_query,
58
60
  Vendors=data,
59
61
  update_tag=update_tag,
@@ -11,8 +11,7 @@ from cartography.config import Config
11
11
  from cartography.intel.trivy.scanner import cleanup
12
12
  from cartography.intel.trivy.scanner import get_json_files_in_dir
13
13
  from cartography.intel.trivy.scanner import get_json_files_in_s3
14
- from cartography.intel.trivy.scanner import sync_single_image_from_file
15
- from cartography.intel.trivy.scanner import sync_single_image_from_s3
14
+ from cartography.intel.trivy.scanner import sync_single_image
16
15
  from cartography.stats import get_stats_client
17
16
  from cartography.util import timeit
18
17
 
@@ -20,53 +19,93 @@ logger = logging.getLogger(__name__)
20
19
  stat_handler = get_stats_client("trivy.scanner")
21
20
 
22
21
 
23
- @timeit
24
- def get_scan_targets(
22
+ def _get_scan_targets_and_aliases(
25
23
  neo4j_session: Session,
26
24
  account_ids: list[str] | None = None,
27
- ) -> set[str]:
25
+ ) -> tuple[set[str], dict[str, str]]:
28
26
  """
29
- Return list of ECR images from all accounts in the graph.
27
+ Return tag URIs and a mapping of digest-qualified URIs to tag URIs.
30
28
  """
31
29
  if not account_ids:
32
30
  aws_accounts = list_accounts(neo4j_session)
33
31
  else:
34
32
  aws_accounts = account_ids
35
33
 
36
- ecr_images: set[str] = set()
34
+ image_uris: set[str] = set()
35
+ digest_aliases: dict[str, str] = {}
36
+
37
37
  for account_id in aws_accounts:
38
- for _, _, image_uri, _, _ in get_ecr_images(neo4j_session, account_id):
39
- ecr_images.add(image_uri)
38
+ for _, _, image_uri, _, digest in get_ecr_images(neo4j_session, account_id):
39
+ if not image_uri:
40
+ continue
41
+ image_uris.add(image_uri)
42
+ if digest:
43
+ # repo URI is everything before the trailing ":" (if present)
44
+ repo_uri = image_uri.rsplit(":", 1)[0]
45
+ digest_uri = f"{repo_uri}@{digest}"
46
+ digest_aliases[digest_uri] = image_uri
40
47
 
41
- return ecr_images
48
+ return image_uris, digest_aliases
42
49
 
43
50
 
44
- def _get_intersection(
45
- image_uris: set[str], json_files: set[str], trivy_s3_prefix: str
46
- ) -> list[tuple[str, str]]:
51
+ @timeit
52
+ def get_scan_targets(
53
+ neo4j_session: Session,
54
+ account_ids: list[str] | None = None,
55
+ ) -> set[str]:
56
+ """
57
+ Return list of ECR images from all accounts in the graph.
47
58
  """
48
- Get the intersection of ECR images in the graph and S3 scan results.
59
+ image_uris, _ = _get_scan_targets_and_aliases(neo4j_session, account_ids)
60
+ return image_uris
49
61
 
50
- Args:
51
- image_uris: Set of ECR images in the graph
52
- json_files: Set of S3 object keys for JSON files
53
- trivy_s3_prefix: S3 prefix path containing scan results
54
62
 
55
- Returns:
56
- List of tuples (image_uri, s3_object_key)
63
+ def _prepare_trivy_data(
64
+ trivy_data: dict[str, Any],
65
+ image_uris: set[str],
66
+ digest_aliases: dict[str, str],
67
+ source: str,
68
+ ) -> tuple[dict[str, Any], str] | None:
57
69
  """
58
- intersection = []
59
- prefix_len = len(trivy_s3_prefix)
60
- for s3_object_key in json_files:
61
- # Sample key "123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
62
- # Sample key "folder/derp/123456789012.dkr.ecr.us-west-2.amazonaws.com/other-repo:v1.0.json"
63
- # Remove the prefix and the .json suffix
64
- image_uri = s3_object_key[prefix_len:-5]
70
+ Determine the tag URI that corresponds to this Trivy payload.
71
+
72
+ Returns (trivy_data, display_uri) if the payload can be linked to an image present
73
+ in the graph; otherwise returns None so the caller can skip ingestion.
74
+ """
75
+
76
+ artifact_name = (trivy_data.get("ArtifactName") or "").strip()
77
+ metadata = trivy_data.get("Metadata") or {}
78
+ candidates: list[str] = []
79
+
80
+ if artifact_name:
81
+ candidates.append(artifact_name)
65
82
 
66
- if image_uri in image_uris:
67
- intersection.append((image_uri, s3_object_key))
83
+ repo_tags = metadata.get("RepoTags", [])
84
+ repo_digests = metadata.get("RepoDigests", [])
85
+ stripped_tags_digests = [item.strip() for item in repo_tags + repo_digests]
86
+ candidates.extend(stripped_tags_digests)
68
87
 
69
- return intersection
88
+ display_uri: str | None = None
89
+
90
+ for candidate in candidates:
91
+ if not candidate:
92
+ continue
93
+ if candidate in image_uris:
94
+ display_uri = candidate
95
+ break
96
+ alias = digest_aliases.get(candidate)
97
+ if alias:
98
+ display_uri = alias
99
+ break
100
+
101
+ if not display_uri:
102
+ logger.debug(
103
+ "Skipping Trivy results for %s because no matching image URI was found in the graph",
104
+ source,
105
+ )
106
+ return None
107
+
108
+ return trivy_data, display_uri
70
109
 
71
110
 
72
111
  @timeit
@@ -93,15 +132,12 @@ def sync_trivy_aws_ecr_from_s3(
93
132
  f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
94
133
  )
95
134
 
96
- image_uris: set[str] = get_scan_targets(neo4j_session)
135
+ image_uris, digest_aliases = _get_scan_targets_and_aliases(neo4j_session)
97
136
  json_files: set[str] = get_json_files_in_s3(
98
137
  trivy_s3_bucket, trivy_s3_prefix, boto3_session
99
138
  )
100
- intersection: list[tuple[str, str]] = _get_intersection(
101
- image_uris, json_files, trivy_s3_prefix
102
- )
103
139
 
104
- if len(intersection) == 0:
140
+ if len(json_files) == 0:
105
141
  logger.error(
106
142
  f"Trivy sync was configured, but there are no ECR images with S3 json scan results in bucket "
107
143
  f"'{trivy_s3_bucket}' with prefix '{trivy_s3_prefix}'. "
@@ -110,18 +146,33 @@ def sync_trivy_aws_ecr_from_s3(
110
146
  f"`<image_uri>.json` and to be in the same bucket and prefix as the scan results. If the prefix is "
111
147
  "a folder, it MUST end with a trailing slash '/'. "
112
148
  )
113
- logger.error(f"JSON files in S3: {json_files}")
114
149
  raise ValueError("No ECR images with S3 json scan results found.")
115
150
 
116
- logger.info(f"Processing {len(intersection)} ECR images with S3 scan results")
117
- for image_uri, s3_object_key in intersection:
118
- sync_single_image_from_s3(
151
+ logger.info(f"Processing {len(json_files)} Trivy result files from S3")
152
+ s3_client = boto3_session.client("s3")
153
+ for s3_object_key in json_files:
154
+ logger.debug(
155
+ f"Reading scan results from S3: s3://{trivy_s3_bucket}/{s3_object_key}"
156
+ )
157
+ response = s3_client.get_object(Bucket=trivy_s3_bucket, Key=s3_object_key)
158
+ scan_data_json = response["Body"].read().decode("utf-8")
159
+ trivy_data = json.loads(scan_data_json)
160
+
161
+ prepared = _prepare_trivy_data(
162
+ trivy_data,
163
+ image_uris=image_uris,
164
+ digest_aliases=digest_aliases,
165
+ source=f"s3://{trivy_s3_bucket}/{s3_object_key}",
166
+ )
167
+ if prepared is None:
168
+ continue
169
+
170
+ prepared_data, display_uri = prepared
171
+ sync_single_image(
119
172
  neo4j_session,
120
- image_uri,
173
+ prepared_data,
174
+ display_uri,
121
175
  update_tag,
122
- trivy_s3_bucket,
123
- s3_object_key,
124
- boto3_session,
125
176
  )
126
177
 
127
178
  cleanup(neo4j_session, common_job_parameters)
@@ -137,7 +188,7 @@ def sync_trivy_aws_ecr_from_dir(
137
188
  """Sync Trivy scan results from local files for AWS ECR images."""
138
189
  logger.info(f"Using Trivy scan results from {results_dir}")
139
190
 
140
- image_uris: set[str] = get_scan_targets(neo4j_session)
191
+ image_uris, digest_aliases = _get_scan_targets_and_aliases(neo4j_session)
141
192
  json_files: set[str] = get_json_files_in_dir(results_dir)
142
193
 
143
194
  if not json_files:
@@ -149,27 +200,27 @@ def sync_trivy_aws_ecr_from_dir(
149
200
  logger.info(f"Processing {len(json_files)} local Trivy result files")
150
201
 
151
202
  for file_path in json_files:
152
- # First, check if the image exists in the graph before syncing
153
203
  try:
154
- # Peek at the artifact name without processing the file
155
204
  with open(file_path, encoding="utf-8") as f:
156
205
  trivy_data = json.load(f)
157
- artifact_name = trivy_data.get("ArtifactName")
158
-
159
- if artifact_name and artifact_name not in image_uris:
160
- logger.debug(
161
- f"Skipping results for {artifact_name} since the image is not present in the graph"
162
- )
163
- continue
206
+ except json.JSONDecodeError as e:
207
+ logger.error(f"Failed to read Trivy data from {file_path}: {e}")
208
+ continue
164
209
 
165
- except (json.JSONDecodeError, KeyError) as e:
166
- logger.error(f"Failed to read artifact name from {file_path}: {e}")
210
+ prepared = _prepare_trivy_data(
211
+ trivy_data,
212
+ image_uris=image_uris,
213
+ digest_aliases=digest_aliases,
214
+ source=file_path,
215
+ )
216
+ if prepared is None:
167
217
  continue
168
218
 
169
- # Now sync the file since we know the image exists in the graph
170
- sync_single_image_from_file(
219
+ prepared_data, display_uri = prepared
220
+ sync_single_image(
171
221
  neo4j_session,
172
- file_path,
222
+ prepared_data,
223
+ display_uri,
173
224
  update_tag,
174
225
  )
175
226
 
@@ -47,6 +47,8 @@ class EC2NetworkInterfaceNodeProperties(CartographyNodeProperties):
47
47
  # TODO: remove subnetid once we have migrated to subnet_id
48
48
  subnetid: PropertyRef = PropertyRef("SubnetId", extra_index=True)
49
49
  subnet_id: PropertyRef = PropertyRef("SubnetId", extra_index=True)
50
+ attach_time: PropertyRef = PropertyRef("AttachTime")
51
+ device_index: PropertyRef = PropertyRef("DeviceIndex")
50
52
 
51
53
 
52
54
  @dataclass(frozen=True)
@@ -18,6 +18,14 @@ class ECRImageNodeProperties(CartographyNodeProperties):
18
18
  region: PropertyRef = PropertyRef("Region", set_in_kwargs=True)
19
19
  lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
20
20
  layer_diff_ids: PropertyRef = PropertyRef("layer_diff_ids")
21
+ type: PropertyRef = PropertyRef("type")
22
+ architecture: PropertyRef = PropertyRef("architecture")
23
+ os: PropertyRef = PropertyRef("os")
24
+ variant: PropertyRef = PropertyRef("variant")
25
+ attestation_type: PropertyRef = PropertyRef("attestation_type")
26
+ attests_digest: PropertyRef = PropertyRef("attests_digest")
27
+ media_type: PropertyRef = PropertyRef("media_type")
28
+ artifact_media_type: PropertyRef = PropertyRef("artifact_media_type")
21
29
 
22
30
 
23
31
  @dataclass(frozen=True)
@@ -71,7 +71,7 @@ class ECRRepositoryImageToECRImageRelProperties(CartographyRelProperties):
71
71
  class ECRRepositoryImageToECRImageRel(CartographyRelSchema):
72
72
  target_node_label: str = "ECRImage"
73
73
  target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
74
- {"id": PropertyRef("imageDigest")}
74
+ {"id": PropertyRef("imageDigests", one_to_many=True)}
75
75
  )
76
76
  direction: LinkDirection = LinkDirection.OUTWARD
77
77
  rel_label: str = "IMAGE"
cartography/sync.py CHANGED
@@ -52,7 +52,7 @@ from cartography.util import STATUS_SUCCESS
52
52
  logger = logging.getLogger(__name__)
53
53
 
54
54
 
55
- TOP_LEVEL_MODULES = OrderedDict(
55
+ TOP_LEVEL_MODULES: OrderedDict[str, Callable[..., None]] = OrderedDict(
56
56
  { # preserve order so that the default sync always runs `analysis` at the very end
57
57
  "create-indexes": cartography.intel.create_indexes.run,
58
58
  "airbyte": cartography.intel.airbyte.start_airbyte_ingestion,
cartography/util.py CHANGED
@@ -153,6 +153,9 @@ def merge_module_sync_metadata(
153
153
  :param synced_type: The sub-module's type
154
154
  :param update_tag: Timestamp used to determine data freshness
155
155
  """
156
+ # Import here to avoid circular import with cartography.client.core.tx
157
+ from cartography.client.core.tx import run_write_query
158
+
156
159
  template = Template(
157
160
  """
158
161
  MERGE (n:ModuleSyncMetadata{id:'${group_type}_${group_id}_${synced_type}'})
@@ -164,7 +167,8 @@ def merge_module_sync_metadata(
164
167
  n.lastupdated=$UPDATE_TAG
165
168
  """,
166
169
  )
167
- neo4j_session.run(
170
+ run_write_query(
171
+ neo4j_session,
168
172
  template.safe_substitute(
169
173
  group_type=group_type,
170
174
  group_id=group_id,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cartography
3
- Version: 0.117.0
3
+ Version: 0.118.0
4
4
  Summary: Explore assets and their relationships across your technical infrastructure.
5
5
  Maintainer: Cartography Contributors
6
6
  License-Expression: Apache-2.0
@@ -59,7 +59,7 @@ Requires-Dist: python-dateutil
59
59
  Requires-Dist: xmltodict
60
60
  Requires-Dist: duo-client
61
61
  Requires-Dist: cloudflare<5.0.0,>=4.1.0
62
- Requires-Dist: scaleway>=2.9.0
62
+ Requires-Dist: scaleway>=2.10.0
63
63
  Requires-Dist: google-cloud-resource-manager>=1.14.2
64
64
  Requires-Dist: types-aiobotocore-ecr
65
65
  Requires-Dist: typer>=0.9.0
@@ -89,7 +89,7 @@ You can learn more about the story behind Cartography in our [presentation at BS
89
89
 
90
90
  ## Supported platforms
91
91
  - [Airbyte](https://cartography-cncf.github.io/cartography/modules/airbyte/index.html) - Organization, Workspace, User, Source, Destination, Connection, Tag, Stream
92
- - [Amazon Web Services](https://cartography-cncf.github.io/cartography/modules/aws/index.html) - ACM, API Gateway, CloudWatch, CodeBuild, Config, Cognito, EC2, ECS, ECR (including image layers), EFS, Elasticsearch, Elastic Kubernetes Service (EKS), DynamoDB, Glue, GuardDuty, IAM, Inspector, KMS, Lambda, RDS, Redshift, Route53, S3, Secrets Manager(Secret Versions), Security Hub, SNS, SQS, SSM, STS, Tags
92
+ - [Amazon Web Services](https://cartography-cncf.github.io/cartography/modules/aws/index.html) - ACM, API Gateway, CloudWatch, CodeBuild, Config, Cognito, EC2, ECS, ECR (including multi-arch images, image layers, and attestations), EFS, Elasticsearch, Elastic Kubernetes Service (EKS), DynamoDB, Glue, GuardDuty, IAM, Inspector, KMS, Lambda, RDS, Redshift, Route53, S3, Secrets Manager(Secret Versions), Security Hub, SNS, SQS, SSM, STS, Tags
93
93
  - [Anthropic](https://cartography-cncf.github.io/cartography/modules/anthropic/index.html) - Organization, ApiKey, User, Workspace
94
94
  - [BigFix](https://cartography-cncf.github.io/cartography/modules/bigfix/index.html) - Computers
95
95
  - [Cloudflare](https://cartography-cncf.github.io/cartography/modules/cloudflare/index.html) - Account, Role, Member, Zone, DNSRecord