cartography 0.107.0rc2__py3-none-any.whl → 0.108.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (40) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +10 -0
  3. cartography/config.py +5 -0
  4. cartography/data/indexes.cypher +0 -8
  5. cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
  6. cartography/intel/aws/__init__.py +1 -0
  7. cartography/intel/aws/cloudwatch.py +77 -0
  8. cartography/intel/aws/ec2/security_groups.py +140 -122
  9. cartography/intel/aws/ec2/snapshots.py +47 -84
  10. cartography/intel/aws/ec2/subnets.py +1 -1
  11. cartography/intel/aws/ecs.py +17 -0
  12. cartography/intel/aws/guardduty.py +275 -0
  13. cartography/intel/aws/resources.py +2 -0
  14. cartography/intel/github/repos.py +370 -28
  15. cartography/intel/sentinelone/__init__.py +8 -2
  16. cartography/intel/sentinelone/application.py +248 -0
  17. cartography/intel/sentinelone/utils.py +20 -1
  18. cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
  19. cartography/models/aws/ec2/networkinterfaces.py +2 -0
  20. cartography/models/aws/ec2/security_group_rules.py +109 -0
  21. cartography/models/aws/ec2/security_groups.py +90 -0
  22. cartography/models/aws/ec2/snapshots.py +58 -0
  23. cartography/models/aws/ec2/subnet_instance.py +2 -0
  24. cartography/models/aws/ec2/subnet_networkinterface.py +2 -0
  25. cartography/models/aws/ec2/volumes.py +20 -0
  26. cartography/models/aws/ecs/tasks.py +24 -1
  27. cartography/models/aws/guardduty/__init__.py +1 -0
  28. cartography/models/aws/guardduty/findings.py +102 -0
  29. cartography/models/github/dependencies.py +74 -0
  30. cartography/models/github/manifests.py +49 -0
  31. cartography/models/sentinelone/application.py +44 -0
  32. cartography/models/sentinelone/application_version.py +96 -0
  33. {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/METADATA +3 -3
  34. {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/RECORD +38 -28
  35. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  36. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  37. {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/WHEEL +0 -0
  38. {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/entry_points.txt +0 -0
  39. {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/licenses/LICENSE +0 -0
  40. {cartography-0.107.0rc2.dist-info → cartography-0.108.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from typing import Any
2
3
  from typing import Dict
3
4
  from typing import List
4
5
 
@@ -6,8 +7,11 @@ import boto3
6
7
  import neo4j
7
8
  from botocore.exceptions import ClientError
8
9
 
10
+ from cartography.client.core.tx import load
11
+ from cartography.client.core.tx import read_list_of_values_tx
12
+ from cartography.graph.job import GraphJob
13
+ from cartography.models.aws.ec2.snapshots import EBSSnapshotSchema
9
14
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
15
  from cartography.util import timeit
12
16
 
13
17
  logger = logging.getLogger(__name__)
@@ -24,12 +28,13 @@ def get_snapshots_in_use(
24
28
  WHERE v.region = $Region
25
29
  RETURN v.snapshotid as snapshot
26
30
  """
27
- results = neo4j_session.run(
31
+ results = read_list_of_values_tx(
32
+ neo4j_session,
28
33
  query,
29
34
  AWS_ACCOUNT_ID=current_aws_account_id,
30
35
  Region=region,
31
36
  )
32
- return [r["snapshot"] for r in results if r["snapshot"]]
37
+ return [str(snapshot) for snapshot in results if snapshot]
33
38
 
34
39
 
35
40
  @timeit
@@ -45,7 +50,6 @@ def get_snapshots(
45
50
  for page in paginator.paginate(OwnerIds=["self"]):
46
51
  snapshots.extend(page["Snapshots"])
47
52
 
48
- # fetch in-use snapshots not in self_owned snapshots
49
53
  self_owned_snapshot_ids = {s["SnapshotId"] for s in snapshots}
50
54
  other_snapshot_ids = set(in_use_snapshot_ids) - self_owned_snapshot_ids
51
55
  if other_snapshot_ids:
@@ -55,8 +59,7 @@ def get_snapshots(
55
59
  except ClientError as e:
56
60
  if e.response["Error"]["Code"] == "InvalidSnapshot.NotFound":
57
61
  logger.warning(
58
- f"Failed to retrieve page of in-use, \
59
- not owned snapshots. Continuing anyway. Error - {e}",
62
+ f"Failed to retrieve page of in-use, not owned snapshots. Continuing anyway. Error - {e}"
60
63
  )
61
64
  else:
62
65
  raise
@@ -64,93 +67,53 @@ def get_snapshots(
64
67
  return snapshots
65
68
 
66
69
 
70
+ def transform_snapshots(snapshots: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
71
+ transformed: List[Dict[str, Any]] = []
72
+ for snap in snapshots:
73
+ transformed.append(
74
+ {
75
+ "SnapshotId": snap["SnapshotId"],
76
+ "Description": snap.get("Description"),
77
+ "Encrypted": snap.get("Encrypted"),
78
+ "Progress": snap.get("Progress"),
79
+ "StartTime": snap.get("StartTime"),
80
+ "State": snap.get("State"),
81
+ "StateMessage": snap.get("StateMessage"),
82
+ "VolumeId": snap.get("VolumeId"),
83
+ "VolumeSize": snap.get("VolumeSize"),
84
+ "OutpostArn": snap.get("OutpostArn"),
85
+ "DataEncryptionKeyId": snap.get("DataEncryptionKeyId"),
86
+ "KmsKeyId": snap.get("KmsKeyId"),
87
+ }
88
+ )
89
+ return transformed
90
+
91
+
67
92
  @timeit
68
93
  def load_snapshots(
69
94
  neo4j_session: neo4j.Session,
70
- data: List[Dict],
95
+ data: List[Dict[str, Any]],
71
96
  region: str,
72
97
  current_aws_account_id: str,
73
98
  update_tag: int,
74
99
  ) -> None:
75
- ingest_snapshots = """
76
- UNWIND $snapshots_list as snapshot
77
- MERGE (s:EBSSnapshot{id: snapshot.SnapshotId})
78
- ON CREATE SET s.firstseen = timestamp()
79
- SET s.lastupdated = $update_tag, s.description = snapshot.Description, s.encrypted = snapshot.Encrypted,
80
- s.progress = snapshot.Progress, s.starttime = snapshot.StartTime, s.state = snapshot.State,
81
- s.statemessage = snapshot.StateMessage, s.volumeid = snapshot.VolumeId, s.volumesize = snapshot.VolumeSize,
82
- s.outpostarn = snapshot.OutpostArn, s.dataencryptionkeyid = snapshot.DataEncryptionKeyId,
83
- s.kmskeyid = snapshot.KmsKeyId, s.region=$Region
84
- WITH s
85
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
86
- MERGE (aa)-[r:RESOURCE]->(s)
87
- ON CREATE SET r.firstseen = timestamp()
88
- SET r.lastupdated = $update_tag
89
- """
90
-
91
- for snapshot in data:
92
- snapshot["StartTime"] = str(snapshot["StartTime"])
93
-
94
- neo4j_session.run(
95
- ingest_snapshots,
96
- snapshots_list=data,
97
- AWS_ACCOUNT_ID=current_aws_account_id,
100
+ load(
101
+ neo4j_session,
102
+ EBSSnapshotSchema(),
103
+ data,
104
+ lastupdated=update_tag,
98
105
  Region=region,
99
- update_tag=update_tag,
100
- )
101
-
102
-
103
- @timeit
104
- def get_snapshot_volumes(snapshots: List[Dict]) -> List[Dict]:
105
- snapshot_volumes: List[Dict] = []
106
- for snapshot in snapshots:
107
- if snapshot.get("VolumeId"):
108
- snapshot_volumes.append(snapshot)
109
-
110
- return snapshot_volumes
111
-
112
-
113
- @timeit
114
- def load_snapshot_volume_relations(
115
- neo4j_session: neo4j.Session,
116
- data: List[Dict],
117
- current_aws_account_id: str,
118
- update_tag: int,
119
- ) -> None:
120
- ingest_volumes = """
121
- UNWIND $snapshot_volumes_list as volume
122
- MERGE (v:EBSVolume{id: volume.VolumeId})
123
- ON CREATE SET v.firstseen = timestamp()
124
- SET v.lastupdated = $update_tag, v.snapshotid = volume.SnapshotId
125
- WITH v, volume
126
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
127
- MERGE (aa)-[r:RESOURCE]->(v)
128
- ON CREATE SET r.firstseen = timestamp()
129
- SET r.lastupdated = $update_tag
130
- WITH v, volume
131
- MATCH (s:EBSSnapshot{id: volume.SnapshotId})
132
- MERGE (s)-[r:CREATED_FROM]->(v)
133
- ON CREATE SET r.firstseen = timestamp()
134
- SET r.lastupdated = $update_tag
135
- """
136
-
137
- neo4j_session.run(
138
- ingest_volumes,
139
- snapshot_volumes_list=data,
140
- AWS_ACCOUNT_ID=current_aws_account_id,
141
- update_tag=update_tag,
106
+ AWS_ID=current_aws_account_id,
142
107
  )
143
108
 
144
109
 
145
110
  @timeit
146
111
  def cleanup_snapshots(
147
112
  neo4j_session: neo4j.Session,
148
- common_job_parameters: Dict,
113
+ common_job_parameters: Dict[str, Any],
149
114
  ) -> None:
150
- run_cleanup_job(
151
- "aws_import_snapshots_cleanup.json",
152
- neo4j_session,
153
- common_job_parameters,
115
+ GraphJob.from_node_schema(EBSSnapshotSchema(), common_job_parameters).run(
116
+ neo4j_session
154
117
  )
155
118
 
156
119
 
@@ -161,7 +124,7 @@ def sync_ebs_snapshots(
161
124
  regions: List[str],
162
125
  current_aws_account_id: str,
163
126
  update_tag: int,
164
- common_job_parameters: Dict,
127
+ common_job_parameters: Dict[str, Any],
165
128
  ) -> None:
166
129
  for region in regions:
167
130
  logger.debug(
@@ -174,12 +137,12 @@ def sync_ebs_snapshots(
174
137
  region,
175
138
  current_aws_account_id,
176
139
  )
177
- data = get_snapshots(boto3_session, region, snapshots_in_use)
178
- load_snapshots(neo4j_session, data, region, current_aws_account_id, update_tag)
179
- snapshot_volumes = get_snapshot_volumes(data)
180
- load_snapshot_volume_relations(
140
+ raw_data = get_snapshots(boto3_session, region, snapshots_in_use)
141
+ transformed_data = transform_snapshots(raw_data)
142
+ load_snapshots(
181
143
  neo4j_session,
182
- snapshot_volumes,
144
+ transformed_data,
145
+ region,
183
146
  current_aws_account_id,
184
147
  update_tag,
185
148
  )
@@ -53,7 +53,7 @@ def load_subnets(
53
53
  snet.state = subnet.State, snet.assignipv6addressoncreation = subnet.AssignIpv6AddressOnCreation,
54
54
  snet.map_public_ip_on_launch = subnet.MapPublicIpOnLaunch, snet.subnet_arn = subnet.SubnetArn,
55
55
  snet.availability_zone = subnet.AvailabilityZone, snet.availability_zone_id = subnet.AvailabilityZoneId,
56
- snet.subnetid = subnet.SubnetId
56
+ snet.subnet_id = subnet.SubnetId
57
57
  """
58
58
 
59
59
  ingest_subnet_vpc_relations = """
@@ -169,6 +169,22 @@ def _get_containers_from_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, An
169
169
  return containers
170
170
 
171
171
 
172
+ def transform_ecs_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
173
+ """
174
+ Extract network interface ID from task attachments.
175
+ """
176
+ for task in tasks:
177
+ for attachment in task.get("attachments", []):
178
+ if attachment.get("type") == "ElasticNetworkInterface":
179
+ details = attachment.get("details", [])
180
+ for detail in details:
181
+ if detail.get("name") == "networkInterfaceId":
182
+ task["networkInterfaceId"] = detail.get("value")
183
+ break
184
+ break
185
+ return tasks
186
+
187
+
172
188
  @timeit
173
189
  def load_ecs_clusters(
174
190
  neo4j_session: neo4j.Session,
@@ -407,6 +423,7 @@ def _sync_ecs_task_and_container_defns(
407
423
  boto3_session,
408
424
  region,
409
425
  )
426
+ tasks = transform_ecs_tasks(tasks)
410
427
  containers = _get_containers_from_tasks(tasks)
411
428
  load_ecs_tasks(
412
429
  neo4j_session,
@@ -0,0 +1,275 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import boto3.session
8
+ import neo4j
9
+
10
+ from cartography.client.core.tx import load
11
+ from cartography.graph.job import GraphJob
12
+ from cartography.models.aws.guardduty.findings import GuardDutyFindingSchema
13
+ from cartography.stats import get_stats_client
14
+ from cartography.util import aws_handle_regions
15
+ from cartography.util import aws_paginate
16
+ from cartography.util import merge_module_sync_metadata
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+ stat_handler = get_stats_client(__name__)
21
+
22
+
23
+ def _get_severity_range_for_threshold(
24
+ severity_threshold: str | None,
25
+ ) -> List[str] | None:
26
+ """
27
+ Convert severity threshold string to GuardDuty numeric severity range.
28
+
29
+ GuardDuty severity mappings:
30
+ - LOW: 1.0-3.9
31
+ - MEDIUM: 4.0-6.9
32
+ - HIGH: 7.0-8.9
33
+ - CRITICAL: 9.0-10.0
34
+
35
+ :param severity_threshold: Severity threshold (LOW, MEDIUM, HIGH, CRITICAL)
36
+ :return: List of numeric severity ranges to include, or None for no filtering
37
+ """
38
+ if not severity_threshold:
39
+ return None
40
+
41
+ threshold_upper = severity_threshold.upper().strip()
42
+
43
+ # Map threshold to numeric ranges - include threshold level and above
44
+ if threshold_upper == "LOW":
45
+ return ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] # All severities
46
+ elif threshold_upper == "MEDIUM":
47
+ return ["4", "5", "6", "7", "8", "9", "10"] # MEDIUM and above
48
+ elif threshold_upper == "HIGH":
49
+ return ["7", "8", "9", "10"] # HIGH and CRITICAL only
50
+ elif threshold_upper == "CRITICAL":
51
+ return ["9", "10"] # CRITICAL only
52
+ else:
53
+ return None
54
+
55
+
56
+ @aws_handle_regions
57
+ def get_detectors(
58
+ boto3_session: boto3.session.Session,
59
+ region: str,
60
+ ) -> List[str]:
61
+ """
62
+ Get GuardDuty detector IDs for all detectors in a region.
63
+ """
64
+ client = boto3_session.client("guardduty", region_name=region)
65
+
66
+ # Get all detector IDs in this region
67
+ detectors_response = client.list_detectors()
68
+ detector_ids = detectors_response.get("DetectorIds", [])
69
+
70
+ if not detector_ids:
71
+ logger.info(f"No GuardDuty detectors found in region {region}")
72
+ return []
73
+
74
+ logger.info(f"Found {len(detector_ids)} GuardDuty detectors in region {region}")
75
+ return detector_ids
76
+
77
+
78
+ @aws_handle_regions
79
+ @timeit
80
+ def get_findings(
81
+ boto3_session: boto3.session.Session,
82
+ region: str,
83
+ detector_id: str,
84
+ severity_threshold: str | None = None,
85
+ ) -> List[Dict[str, Any]]:
86
+ """
87
+ Get GuardDuty findings for a specific detector.
88
+ Only fetches unarchived findings to avoid including closed/resolved findings.
89
+ Optionally filters by severity threshold.
90
+ """
91
+ client = boto3_session.client("guardduty", region_name=region)
92
+
93
+ # Build FindingCriteria - always exclude archived findings
94
+ criteria = {"service.archived": {"Equals": ["false"]}}
95
+
96
+ # Add severity filtering if threshold is provided
97
+ severity_range = _get_severity_range_for_threshold(severity_threshold)
98
+ if severity_range:
99
+ min_severity = min(
100
+ float(s) for s in severity_range
101
+ ) # get min severity from range
102
+ # I chose to ignore the type error here because the AWS API has fields that require different types
103
+ criteria["severity"] = {"GreaterThanOrEqual": int(min_severity)} # type: ignore
104
+
105
+ # Get all finding IDs for this detector with filtering
106
+ finding_ids = list(
107
+ aws_paginate(
108
+ client,
109
+ "list_findings",
110
+ "FindingIds",
111
+ DetectorId=detector_id,
112
+ FindingCriteria={"Criterion": criteria},
113
+ )
114
+ )
115
+
116
+ if not finding_ids:
117
+ logger.info(f"No findings found for detector {detector_id} in region {region}")
118
+ return []
119
+
120
+ findings_data = []
121
+
122
+ # Process findings in batches (GuardDuty API limit is 50)
123
+ batch_size = 50
124
+ for i in range(0, len(finding_ids), batch_size):
125
+ batch_ids = finding_ids[i : i + batch_size]
126
+
127
+ findings_response = client.get_findings(
128
+ DetectorId=detector_id, FindingIds=batch_ids
129
+ )
130
+
131
+ findings_batch = findings_response.get("Findings", [])
132
+ findings_data.extend(findings_batch)
133
+
134
+ logger.info(
135
+ f"Retrieved {len(findings_data)} findings for detector {detector_id} in region {region}"
136
+ )
137
+ return findings_data
138
+
139
+
140
+ def transform_findings(findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
141
+ """Transform GuardDuty findings from API response to schema format."""
142
+ transformed: List[Dict[str, Any]] = []
143
+ for f in findings:
144
+ item: Dict[str, Any] = {
145
+ "id": f["Id"],
146
+ "arn": f.get("Arn"),
147
+ "type": f.get("Type"),
148
+ "severity": f.get("Severity"),
149
+ "title": f.get("Title"),
150
+ "description": f.get("Description"),
151
+ "confidence": f.get("Confidence"),
152
+ "eventfirstseen": f.get("EventFirstSeen"),
153
+ "eventlastseen": f.get("EventLastSeen"),
154
+ "accountid": f.get("AccountId"),
155
+ "region": f.get("Region"),
156
+ "detectorid": f.get("DetectorId"),
157
+ "archived": f.get("Archived"),
158
+ }
159
+
160
+ # Handle nested resource information
161
+ resource = f.get("Resource", {})
162
+ item["resource_type"] = resource.get("ResourceType")
163
+
164
+ # Extract resource ID based on resource type
165
+ if item["resource_type"] == "Instance":
166
+ details = resource.get("InstanceDetails", {})
167
+ item["resource_id"] = details.get("InstanceId")
168
+ elif item["resource_type"] == "S3Bucket":
169
+ buckets = resource.get("S3BucketDetails") or []
170
+ if buckets:
171
+ item["resource_id"] = buckets[0].get("Name")
172
+ else:
173
+ item["resource_id"] = None
174
+
175
+ transformed.append(item)
176
+
177
+ return transformed
178
+
179
+
180
+ @timeit
181
+ def load_guardduty_findings(
182
+ neo4j_session: neo4j.Session,
183
+ data: List[Dict[str, Any]],
184
+ region: str,
185
+ aws_account_id: str,
186
+ update_tag: int,
187
+ ) -> None:
188
+ """
189
+ Load GuardDuty findings information into the graph.
190
+ """
191
+ logger.info(
192
+ f"Loading {len(data)} GuardDuty findings for region {region} into graph."
193
+ )
194
+
195
+ load(
196
+ neo4j_session,
197
+ GuardDutyFindingSchema(),
198
+ data,
199
+ lastupdated=update_tag,
200
+ Region=region,
201
+ AWS_ID=aws_account_id,
202
+ )
203
+
204
+
205
+ @timeit
206
+ def cleanup_guardduty(
207
+ neo4j_session: neo4j.Session, common_job_parameters: Dict
208
+ ) -> None:
209
+ """
210
+ Run GuardDuty cleanup job.
211
+ """
212
+ logger.debug("Running GuardDuty cleanup job.")
213
+ cleanup_job = GraphJob.from_node_schema(
214
+ GuardDutyFindingSchema(), common_job_parameters
215
+ )
216
+ cleanup_job.run(neo4j_session)
217
+
218
+
219
+ @timeit
220
+ def sync(
221
+ neo4j_session: neo4j.Session,
222
+ boto3_session: boto3.session.Session,
223
+ regions: List[str],
224
+ current_aws_account_id: str,
225
+ update_tag: int,
226
+ common_job_parameters: Dict,
227
+ ) -> None:
228
+ """
229
+ Sync GuardDuty findings for all regions.
230
+ Severity threshold filter is obtained from common_job_parameters.
231
+ """
232
+ # Get severity threshold from common job parameters
233
+ severity_threshold = common_job_parameters.get("aws_guardduty_severity_threshold")
234
+ for region in regions:
235
+ logger.info(
236
+ f"Syncing GuardDuty findings for {region} in account {current_aws_account_id}"
237
+ )
238
+
239
+ # Get all detectors in the region
240
+ detector_ids = get_detectors(boto3_session, region)
241
+
242
+ if not detector_ids:
243
+ logger.info(f"No GuardDuty detectors found in region {region}, skipping.")
244
+ continue
245
+
246
+ all_findings = []
247
+
248
+ # Get findings for each detector
249
+ for detector_id in detector_ids:
250
+ findings = get_findings(
251
+ boto3_session, region, detector_id, severity_threshold
252
+ )
253
+ all_findings.extend(findings)
254
+
255
+ transformed_findings = transform_findings(all_findings)
256
+
257
+ load_guardduty_findings(
258
+ neo4j_session,
259
+ transformed_findings,
260
+ region,
261
+ current_aws_account_id,
262
+ update_tag,
263
+ )
264
+
265
+ # Cleanup and metadata update (outside region loop)
266
+ cleanup_guardduty(neo4j_session, common_job_parameters)
267
+
268
+ merge_module_sync_metadata(
269
+ neo4j_session,
270
+ group_type="AWSAccount",
271
+ group_id=current_aws_account_id,
272
+ synced_type="GuardDutyFinding",
273
+ update_tag=update_tag,
274
+ stat_handler=stat_handler,
275
+ )
@@ -18,6 +18,7 @@ from . import eks
18
18
  from . import elasticache
19
19
  from . import elasticsearch
20
20
  from . import emr
21
+ from . import guardduty
21
22
  from . import iam
22
23
  from . import identitycenter
23
24
  from . import inspector
@@ -111,5 +112,6 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
111
112
  "cloudtrail_management_events": cloudtrail_management_events.sync,
112
113
  "cloudwatch": cloudwatch.sync,
113
114
  "efs": efs.sync,
115
+ "guardduty": guardduty.sync,
114
116
  "codebuild": codebuild.sync,
115
117
  }