cartography 0.116.1__py3-none-any.whl → 0.118.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (70) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +11 -0
  3. cartography/client/core/tx.py +23 -2
  4. cartography/config.py +5 -0
  5. cartography/graph/job.py +6 -2
  6. cartography/graph/statement.py +4 -0
  7. cartography/intel/aws/__init__.py +1 -0
  8. cartography/intel/aws/apigateway.py +18 -5
  9. cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
  10. cartography/intel/aws/ec2/internet_gateways.py +4 -2
  11. cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
  12. cartography/intel/aws/ec2/network_interfaces.py +4 -0
  13. cartography/intel/aws/ec2/reserved_instances.py +3 -1
  14. cartography/intel/aws/ec2/tgw.py +11 -5
  15. cartography/intel/aws/ec2/volumes.py +1 -1
  16. cartography/intel/aws/ecr.py +202 -26
  17. cartography/intel/aws/ecr_image_layers.py +174 -21
  18. cartography/intel/aws/elasticsearch.py +13 -4
  19. cartography/intel/aws/identitycenter.py +93 -54
  20. cartography/intel/aws/inspector.py +26 -14
  21. cartography/intel/aws/permission_relationships.py +3 -3
  22. cartography/intel/aws/s3.py +26 -13
  23. cartography/intel/aws/ssm.py +3 -5
  24. cartography/intel/azure/__init__.py +16 -0
  25. cartography/intel/azure/compute.py +9 -4
  26. cartography/intel/azure/container_instances.py +95 -0
  27. cartography/intel/azure/cosmosdb.py +31 -15
  28. cartography/intel/azure/data_lake.py +124 -0
  29. cartography/intel/azure/sql.py +25 -12
  30. cartography/intel/azure/storage.py +19 -9
  31. cartography/intel/azure/subscription.py +3 -1
  32. cartography/intel/crowdstrike/spotlight.py +5 -2
  33. cartography/intel/entra/app_role_assignments.py +9 -2
  34. cartography/intel/gcp/__init__.py +26 -9
  35. cartography/intel/gcp/clients.py +8 -4
  36. cartography/intel/gcp/compute.py +39 -18
  37. cartography/intel/gcp/crm/folders.py +9 -3
  38. cartography/intel/gcp/crm/orgs.py +8 -3
  39. cartography/intel/gcp/crm/projects.py +14 -3
  40. cartography/intel/github/teams.py +3 -3
  41. cartography/intel/jamf/computers.py +7 -1
  42. cartography/intel/oci/iam.py +23 -9
  43. cartography/intel/oci/organizations.py +3 -1
  44. cartography/intel/oci/utils.py +28 -5
  45. cartography/intel/okta/awssaml.py +8 -7
  46. cartography/intel/pagerduty/escalation_policies.py +13 -6
  47. cartography/intel/pagerduty/schedules.py +9 -4
  48. cartography/intel/pagerduty/services.py +7 -3
  49. cartography/intel/pagerduty/teams.py +5 -2
  50. cartography/intel/pagerduty/users.py +3 -1
  51. cartography/intel/pagerduty/vendors.py +3 -1
  52. cartography/intel/trivy/__init__.py +109 -58
  53. cartography/models/aws/ec2/networkinterfaces.py +2 -0
  54. cartography/models/aws/ecr/image.py +38 -1
  55. cartography/models/aws/ecr/repository_image.py +1 -1
  56. cartography/models/azure/container_instance.py +55 -0
  57. cartography/models/azure/data_lake_filesystem.py +51 -0
  58. cartography/rules/cli.py +8 -6
  59. cartography/rules/data/frameworks/mitre_attack/__init__.py +7 -1
  60. cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +317 -0
  61. cartography/rules/data/frameworks/mitre_attack/requirements/t1190_exploit_public_facing_application/__init__.py +1 -0
  62. cartography/rules/spec/model.py +13 -0
  63. cartography/sync.py +1 -1
  64. cartography/util.py +5 -1
  65. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/METADATA +5 -4
  66. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/RECORD +70 -65
  67. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/WHEEL +0 -0
  68. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/entry_points.txt +0 -0
  69. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/licenses/LICENSE +0 -0
  70. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.116.1'
32
- __version_tuple__ = version_tuple = (0, 116, 1)
31
+ __version__ = version = '0.118.0'
32
+ __version_tuple__ = version_tuple = (0, 118, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
cartography/cli.py CHANGED
@@ -279,6 +279,17 @@ class CLI:
279
279
  "Example: 'HIGH' will sync only HIGH and CRITICAL findings, filtering out LOW and MEDIUM severity findings."
280
280
  ),
281
281
  )
282
+ parser.add_argument(
283
+ "--experimental-aws-inspector-batch",
284
+ type=int,
285
+ default=1000,
286
+ help=(
287
+ "EXPERIMENTAL: This feature is experimental and may be removed in the future. "
288
+ "Batch size for AWS Inspector findings sync. Controls how many findings are fetched, processed and cleaned up at a time. "
289
+ "Default is 1000. Increase this value if you have a large number of findings and want to reduce API calls, "
290
+ "or decrease it if you're experiencing memory issues."
291
+ ),
292
+ )
282
293
  parser.add_argument(
283
294
  "--analysis-job-directory",
284
295
  type=str,
@@ -6,6 +6,7 @@ from typing import Optional
6
6
  from typing import Tuple
7
7
  from typing import Union
8
8
 
9
+ import backoff
9
10
  import neo4j
10
11
 
11
12
  from cartography.graph.querybuilder import build_create_index_queries
@@ -14,11 +15,31 @@ from cartography.graph.querybuilder import build_ingestion_query
14
15
  from cartography.graph.querybuilder import build_matchlink_query
15
16
  from cartography.models.core.nodes import CartographyNodeSchema
16
17
  from cartography.models.core.relationships import CartographyRelSchema
18
+ from cartography.util import backoff_handler
17
19
  from cartography.util import batch
18
20
 
19
21
  logger = logging.getLogger(__name__)
20
22
 
21
23
 
24
+ @backoff.on_exception( # type: ignore
25
+ backoff.expo,
26
+ (
27
+ ConnectionResetError,
28
+ neo4j.exceptions.ServiceUnavailable,
29
+ neo4j.exceptions.SessionExpired,
30
+ neo4j.exceptions.TransientError,
31
+ ),
32
+ max_tries=5,
33
+ on_backoff=backoff_handler,
34
+ )
35
+ def _run_index_query_with_retry(neo4j_session: neo4j.Session, query: str) -> None:
36
+ """
37
+ Execute an index creation query with retry logic.
38
+ Index creation requires autocommit transactions and can experience transient errors.
39
+ """
40
+ neo4j_session.run(query)
41
+
42
+
22
43
  def run_write_query(
23
44
  neo4j_session: neo4j.Session, query: str, **parameters: Any
24
45
  ) -> None:
@@ -269,7 +290,7 @@ def ensure_indexes(
269
290
  raise ValueError(
270
291
  'Query provided to `ensure_indexes()` does not start with "CREATE INDEX IF NOT EXISTS".',
271
292
  )
272
- neo4j_session.run(query)
293
+ _run_index_query_with_retry(neo4j_session, query)
273
294
 
274
295
 
275
296
  def ensure_indexes_for_matchlinks(
@@ -288,7 +309,7 @@ def ensure_indexes_for_matchlinks(
288
309
  raise ValueError(
289
310
  'Query provided to `ensure_indexes_for_matchlinks()` does not start with "CREATE INDEX IF NOT EXISTS".',
290
311
  )
291
- neo4j_session.run(query)
312
+ _run_index_query_with_retry(neo4j_session, query)
292
313
 
293
314
 
294
315
  def load(
cartography/config.py CHANGED
@@ -58,6 +58,9 @@ class Config:
58
58
  :type aws_guardduty_severity_threshold: str
59
59
  :param aws_guardduty_severity_threshold: GuardDuty severity threshold filter. Only findings at or above this
60
60
  severity level will be synced. Valid values: LOW, MEDIUM, HIGH, CRITICAL. Optional.
61
+ :type experimental_aws_inspector_batch: int
62
+ :param experimental_aws_inspector_batch: EXPERIMENTAL: Batch size for AWS Inspector findings sync. Controls how
63
+ many findings are fetched, processed and cleaned up at a time. Default is 1000. Optional.
61
64
  :type analysis_job_directory: str
62
65
  :param analysis_job_directory: Path to a directory tree containing analysis jobs to run. Optional.
63
66
  :type oci_sync_all_profiles: bool
@@ -195,6 +198,7 @@ class Config:
195
198
  aws_regions=None,
196
199
  aws_best_effort_mode=False,
197
200
  aws_cloudtrail_management_events_lookback_hours=None,
201
+ experimental_aws_inspector_batch=1000,
198
202
  azure_sync_all_subscriptions=False,
199
203
  azure_sp_auth=None,
200
204
  azure_tenant_id=None,
@@ -287,6 +291,7 @@ class Config:
287
291
  self.aws_cloudtrail_management_events_lookback_hours = (
288
292
  aws_cloudtrail_management_events_lookback_hours
289
293
  )
294
+ self.experimental_aws_inspector_batch = experimental_aws_inspector_batch
290
295
  self.azure_sync_all_subscriptions = azure_sync_all_subscriptions
291
296
  self.azure_sp_auth = azure_sp_auth
292
297
  self.azure_tenant_id = azure_tenant_id
cartography/graph/job.py CHANGED
@@ -139,11 +139,13 @@ class GraphJob:
139
139
  cls,
140
140
  node_schema: CartographyNodeSchema,
141
141
  parameters: Dict[str, Any],
142
+ iterationsize: int = 100,
142
143
  ) -> "GraphJob":
143
144
  """
144
145
  Create a cleanup job from a CartographyNodeSchema object.
145
146
  For a given node, the fields used in the node_schema.sub_resource_relationship.target_node_node_matcher.keys()
146
147
  must be provided as keys and values in the params dict.
148
+ :param iterationsize: The number of items to process in each iteration. Defaults to 100.
147
149
  """
148
150
  queries: List[str] = build_cleanup_queries(node_schema)
149
151
 
@@ -165,7 +167,7 @@ class GraphJob:
165
167
  query,
166
168
  parameters=parameters,
167
169
  iterative=True,
168
- iterationsize=100,
170
+ iterationsize=iterationsize,
169
171
  parent_job_name=node_schema.label,
170
172
  parent_job_sequence_num=idx,
171
173
  )
@@ -185,6 +187,7 @@ class GraphJob:
185
187
  sub_resource_label: str,
186
188
  sub_resource_id: str,
187
189
  update_tag: int,
190
+ iterationsize: int = 100,
188
191
  ) -> "GraphJob":
189
192
  """
190
193
  Create a cleanup job from a CartographyRelSchema object (specifically, a MatchLink).
@@ -194,6 +197,7 @@ class GraphJob:
194
197
  - For a given rel_schema, the fields used in the rel_schema.properties._sub_resource_label.name and
195
198
  rel_schema.properties._sub_resource_id.name must be provided as keys and values in the params dict.
196
199
  - The rel_schema must have a source_node_matcher and target_node_matcher.
200
+ :param iterationsize: The number of items to process in each iteration. Defaults to 100.
197
201
  """
198
202
  cleanup_link_query = build_cleanup_query_for_matchlink(rel_schema)
199
203
  logger.debug(f"Cleanup query: {cleanup_link_query}")
@@ -208,7 +212,7 @@ class GraphJob:
208
212
  cleanup_link_query,
209
213
  parameters=parameters,
210
214
  iterative=True,
211
- iterationsize=100,
215
+ iterationsize=iterationsize,
212
216
  parent_job_name=rel_schema.rel_label,
213
217
  )
214
218
 
@@ -52,6 +52,10 @@ class GraphStatement:
52
52
  self.parameters = parameters or {}
53
53
  self.iterative = iterative
54
54
  self.iterationsize = iterationsize
55
+ if iterationsize < 0:
56
+ raise ValueError(
57
+ f"iterationsize must be a positive integer, got {iterationsize}",
58
+ )
55
59
  self.parameters["LIMIT_SIZE"] = self.iterationsize
56
60
 
57
61
  self.parent_job_name = parent_job_name if parent_job_name else None
@@ -312,6 +312,7 @@ def start_aws_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
312
312
  "permission_relationships_file": config.permission_relationships_file,
313
313
  "aws_guardduty_severity_threshold": config.aws_guardduty_severity_threshold,
314
314
  "aws_cloudtrail_management_events_lookback_hours": config.aws_cloudtrail_management_events_lookback_hours,
315
+ "experimental_aws_inspector_batch": config.experimental_aws_inspector_batch,
315
316
  }
316
317
  try:
317
318
  boto3_session = boto3.Session()
@@ -178,11 +178,24 @@ def get_rest_api_resources_methods_integrations(
178
178
  method["apiId"] = api["id"]
179
179
  method["httpMethod"] = http_method
180
180
  methods.append(method)
181
- integration = client.get_integration(
182
- restApiId=api["id"],
183
- resourceId=resource_id,
184
- httpMethod=http_method,
185
- )
181
+ try:
182
+ integration = client.get_integration(
183
+ restApiId=api["id"],
184
+ resourceId=resource_id,
185
+ httpMethod=http_method,
186
+ )
187
+ except ClientError as e:
188
+ error_code = e.response.get("Error", {}).get("Code")
189
+ if error_code == "NotFoundException":
190
+ logger.warning(
191
+ "No integration found for API %s resource %s method %s: %s",
192
+ api["id"],
193
+ resource_id,
194
+ http_method,
195
+ e,
196
+ )
197
+ continue
198
+ raise
186
199
  integration["resourceId"] = resource_id
187
200
  integration["apiId"] = api["id"]
188
201
  integration["integrationHttpMethod"] = integration.get("httpMethod")
@@ -6,6 +6,7 @@ import boto3
6
6
  import neo4j
7
7
  from botocore.exceptions import ClientError
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import aws_handle_regions
10
11
  from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
@@ -83,7 +84,8 @@ def load_elastic_ip_addresses(
83
84
  SET r.lastupdated = $update_tag
84
85
  """
85
86
 
86
- neo4j_session.run(
87
+ run_write_query(
88
+ neo4j_session,
87
89
  ingest_addresses,
88
90
  elastic_ip_addresses=elastic_ip_addresses,
89
91
  Region=region,
@@ -5,6 +5,7 @@ from typing import List
5
5
  import boto3
6
6
  import neo4j
7
7
 
8
+ from cartography.client.core.tx import run_write_query
8
9
  from cartography.util import aws_handle_regions
9
10
  from cartography.util import run_cleanup_job
10
11
  from cartography.util import timeit
@@ -63,13 +64,14 @@ def load_internet_gateways(
63
64
  SET r.lastupdated = $aws_update_tag
64
65
  """
65
66
 
66
- neo4j_session.run(
67
+ run_write_query(
68
+ neo4j_session,
67
69
  query,
68
70
  internet_gateways=internet_gateways,
69
71
  region=region,
70
72
  aws_account_id=current_aws_account_id,
71
73
  aws_update_tag=update_tag,
72
- ).consume()
74
+ )
73
75
 
74
76
 
75
77
  @timeit
@@ -6,6 +6,7 @@ import boto3
6
6
  import botocore
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import aws_handle_regions
10
11
  from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
@@ -104,7 +105,8 @@ def load_load_balancer_v2s(
104
105
  logger.warning("Skipping load balancer entry with missing DNSName: %r", lb)
105
106
  continue
106
107
 
107
- neo4j_session.run(
108
+ run_write_query(
109
+ neo4j_session,
108
110
  ingest_load_balancer_v2,
109
111
  ID=load_balancer_id,
110
112
  CREATED_TIME=str(lb["CreatedTime"]),
@@ -138,7 +140,8 @@ def load_load_balancer_v2s(
138
140
  SET r.lastupdated = $update_tag
139
141
  """
140
142
  for group in lb["SecurityGroups"]:
141
- neo4j_session.run(
143
+ run_write_query(
144
+ neo4j_session,
142
145
  ingest_load_balancer_v2_security_group,
143
146
  ID=load_balancer_id,
144
147
  GROUP_ID=str(group),
@@ -182,7 +185,8 @@ def load_load_balancer_v2_subnets(
182
185
  SET r.lastupdated = $update_tag
183
186
  """
184
187
  for az in az_data:
185
- neo4j_session.run(
188
+ run_write_query(
189
+ neo4j_session,
186
190
  ingest_load_balancer_subnet,
187
191
  ID=load_balancer_id,
188
192
  SubnetId=az["SubnetId"],
@@ -219,7 +223,8 @@ def load_load_balancer_v2_target_groups(
219
223
  continue
220
224
 
221
225
  for instance in target_group["Targets"]:
222
- neo4j_session.run(
226
+ run_write_query(
227
+ neo4j_session,
223
228
  ingest_instances,
224
229
  ID=load_balancer_id,
225
230
  INSTANCE_ID=instance,
@@ -253,7 +258,8 @@ def load_load_balancer_v2_listeners(
253
258
  ON CREATE SET r.firstseen = timestamp()
254
259
  SET r.lastupdated = $update_tag
255
260
  """
256
- neo4j_session.run(
261
+ run_write_query(
262
+ neo4j_session,
257
263
  ingest_listener,
258
264
  LoadBalancerId=load_balancer_id,
259
265
  Listeners=listener_data,
@@ -98,6 +98,10 @@ def transform_network_interface_data(
98
98
  "SourceDestCheck": network_interface["SourceDestCheck"],
99
99
  "Status": network_interface["Status"],
100
100
  "SubnetId": network_interface["SubnetId"],
101
+ "AttachTime": network_interface.get("Attachment", {}).get("AttachTime"),
102
+ "DeviceIndex": network_interface.get("Attachment", {}).get(
103
+ "DeviceIndex"
104
+ ),
101
105
  "ElbV1Id": elb_v1_id,
102
106
  "ElbV2Id": elb_v2_id,
103
107
  },
@@ -6,6 +6,7 @@ import boto3
6
6
  import neo4j
7
7
  from botocore.exceptions import ClientError
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import aws_handle_regions
10
11
  from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
@@ -64,7 +65,8 @@ def load_reserved_instances(
64
65
  r_instance["Start"] = str(r_instance["Start"])
65
66
  r_instance["End"] = str(r_instance["End"])
66
67
 
67
- neo4j_session.run(
68
+ run_write_query(
69
+ neo4j_session,
68
70
  ingest_reserved_instances,
69
71
  reserved_instances_list=data,
70
72
  AWS_ACCOUNT_ID=current_aws_account_id,
@@ -6,6 +6,7 @@ import boto3
6
6
  import botocore.exceptions
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import aws_handle_regions
10
11
  from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
@@ -120,7 +121,8 @@ def load_transit_gateways(
120
121
  for tgw in data:
121
122
  tgw_id = tgw["TransitGatewayId"]
122
123
 
123
- neo4j_session.run(
124
+ run_write_query(
125
+ neo4j_session,
124
126
  ingest_transit_gateway,
125
127
  TgwId=tgw_id,
126
128
  ARN=tgw["TransitGatewayArn"],
@@ -161,7 +163,8 @@ def _attach_shared_transit_gateway(
161
163
  """
162
164
 
163
165
  if tgw["OwnerId"] != current_aws_account_id:
164
- neo4j_session.run(
166
+ run_write_query(
167
+ neo4j_session,
165
168
  attach_tgw,
166
169
  ARN=tgw["TransitGatewayArn"],
167
170
  TransitGatewayId=tgw["TransitGatewayId"],
@@ -202,7 +205,8 @@ def load_tgw_attachments(
202
205
  for tgwa in data:
203
206
  tgwa_id = tgwa["TransitGatewayAttachmentId"]
204
207
 
205
- neo4j_session.run(
208
+ run_write_query(
209
+ neo4j_session,
206
210
  ingest_transit_gateway,
207
211
  TgwAttachmentId=tgwa_id,
208
212
  TransitGatewayId=tgwa["TransitGatewayId"],
@@ -261,7 +265,8 @@ def _attach_tgw_vpc_attachment_to_vpc_subnets(
261
265
  SET p.lastupdated = $update_tag
262
266
  """
263
267
 
264
- neo4j_session.run(
268
+ run_write_query(
269
+ neo4j_session,
265
270
  attach_vpc_tgw_attachment_to_vpc,
266
271
  VpcId=tgw_vpc_attachment["VpcId"],
267
272
  TgwAttachmentId=tgw_vpc_attachment["TransitGatewayAttachmentId"],
@@ -269,7 +274,8 @@ def _attach_tgw_vpc_attachment_to_vpc_subnets(
269
274
  )
270
275
 
271
276
  for subnet_id in tgw_vpc_attachment["SubnetIds"]:
272
- neo4j_session.run(
277
+ run_write_query(
278
+ neo4j_session,
273
279
  attach_vpc_tgw_attachment_to_subnet,
274
280
  SubnetId=subnet_id,
275
281
  TgwAttachmentId=tgw_vpc_attachment["TransitGatewayAttachmentId"],
@@ -70,7 +70,7 @@ def transform_volumes(
70
70
 
71
71
  for attachment in active_attachments:
72
72
  vol_with_attachment = raw_vol.copy()
73
- vol_with_attachment["InstanceId"] = attachment["InstanceId"]
73
+ vol_with_attachment["InstanceId"] = attachment.get("InstanceId")
74
74
  result.append(vol_with_attachment)
75
75
 
76
76
  return result