cartography 0.107.0rc3__py3-none-any.whl → 0.108.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (27) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +10 -0
  3. cartography/config.py +5 -0
  4. cartography/data/indexes.cypher +0 -8
  5. cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
  6. cartography/intel/aws/__init__.py +1 -0
  7. cartography/intel/aws/ec2/security_groups.py +140 -122
  8. cartography/intel/aws/ec2/snapshots.py +47 -84
  9. cartography/intel/aws/guardduty.py +275 -0
  10. cartography/intel/aws/resources.py +2 -0
  11. cartography/intel/github/repos.py +370 -28
  12. cartography/models/aws/ec2/security_group_rules.py +109 -0
  13. cartography/models/aws/ec2/security_groups.py +90 -0
  14. cartography/models/aws/ec2/snapshots.py +58 -0
  15. cartography/models/aws/ec2/volumes.py +20 -0
  16. cartography/models/aws/guardduty/__init__.py +1 -0
  17. cartography/models/aws/guardduty/findings.py +102 -0
  18. cartography/models/github/dependencies.py +74 -0
  19. cartography/models/github/manifests.py +49 -0
  20. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/METADATA +3 -3
  21. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/RECORD +25 -19
  22. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  23. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  24. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/WHEEL +0 -0
  25. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/entry_points.txt +0 -0
  26. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/licenses/LICENSE +0 -0
  27. {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.107.0rc3'
21
- __version_tuple__ = version_tuple = (0, 107, 0, 'rc3')
20
+ __version__ = version = '0.108.0rc1'
21
+ __version_tuple__ = version_tuple = (0, 108, 0, 'rc1')
cartography/cli.py CHANGED
@@ -264,6 +264,16 @@ class CLI:
264
264
  " If not specified, cartography by default will run all AWS sync modules available."
265
265
  ),
266
266
  )
267
+ parser.add_argument(
268
+ "--aws-guardduty-severity-threshold",
269
+ type=str,
270
+ default=None,
271
+ help=(
272
+ "GuardDuty severity threshold filter. Only findings at or above this severity level will be synced. "
273
+ "Valid values: LOW, MEDIUM, HIGH, CRITICAL. If not specified, all findings (except archived) will be synced. "
274
+ "Example: 'HIGH' will sync only HIGH and CRITICAL findings, filtering out LOW and MEDIUM severity findings."
275
+ ),
276
+ )
267
277
  parser.add_argument(
268
278
  "--analysis-job-directory",
269
279
  type=str,
cartography/config.py CHANGED
@@ -53,6 +53,9 @@ class Config:
53
53
  :param entra_client_secret: Client Secret for connecting in a Service Principal Authentication approach. Optional.
54
54
  :type aws_requested_syncs: str
55
55
  :param aws_requested_syncs: Comma-separated list of AWS resources to sync. Optional.
56
+ :type aws_guardduty_severity_threshold: str
57
+ :param aws_guardduty_severity_threshold: GuardDuty severity threshold filter. Only findings at or above this
58
+ severity level will be synced. Valid values: LOW, MEDIUM, HIGH, CRITICAL. Optional.
56
59
  :type analysis_job_directory: str
57
60
  :param analysis_job_directory: Path to a directory tree containing analysis jobs to run. Optional.
58
61
  :type oci_sync_all_profiles: bool
@@ -185,6 +188,7 @@ class Config:
185
188
  entra_client_id=None,
186
189
  entra_client_secret=None,
187
190
  aws_requested_syncs=None,
191
+ aws_guardduty_severity_threshold=None,
188
192
  analysis_job_directory=None,
189
193
  oci_sync_all_profiles=None,
190
194
  okta_org_id=None,
@@ -268,6 +272,7 @@ class Config:
268
272
  self.entra_client_id = entra_client_id
269
273
  self.entra_client_secret = entra_client_secret
270
274
  self.aws_requested_syncs = aws_requested_syncs
275
+ self.aws_guardduty_severity_threshold = aws_guardduty_severity_threshold
271
276
  self.analysis_job_directory = analysis_job_directory
272
277
  self.oci_sync_all_profiles = oci_sync_all_profiles
273
278
  self.okta_org_id = okta_org_id
@@ -81,8 +81,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:DODroplet) ON (n.id);
81
81
  CREATE INDEX IF NOT EXISTS FOR (n:DODroplet) ON (n.lastupdated);
82
82
  CREATE INDEX IF NOT EXISTS FOR (n:DOProject) ON (n.id);
83
83
  CREATE INDEX IF NOT EXISTS FOR (n:DOProject) ON (n.lastupdated);
84
- CREATE INDEX IF NOT EXISTS FOR (n:EBSSnapshot) ON (n.id);
85
- CREATE INDEX IF NOT EXISTS FOR (n:EBSSnapshot) ON (n.lastupdated);
86
84
  CREATE INDEX IF NOT EXISTS FOR (n:EC2KeyPair) ON (n.keyfingerprint);
87
85
  CREATE INDEX IF NOT EXISTS FOR (n:EC2ReservedInstance) ON (n.id);
88
86
  CREATE INDEX IF NOT EXISTS FOR (n:EC2ReservedInstance) ON (n.lastupdated);
@@ -156,14 +154,8 @@ CREATE INDEX IF NOT EXISTS FOR (n:GSuiteUser) ON (n.lastupdated);
156
154
  CREATE INDEX IF NOT EXISTS FOR (n:Ip) ON (n.id);
157
155
  CREATE INDEX IF NOT EXISTS FOR (n:Ip) ON (n.ip);
158
156
  CREATE INDEX IF NOT EXISTS FOR (n:Ip) ON (n.lastupdated);
159
- CREATE INDEX IF NOT EXISTS FOR (n:IpPermissionInbound) ON (n.ruleid);
160
- CREATE INDEX IF NOT EXISTS FOR (n:IpPermissionInbound) ON (n.lastupdated);
161
- CREATE INDEX IF NOT EXISTS FOR (n:IpPermissionsEgress) ON (n.ruleid);
162
- CREATE INDEX IF NOT EXISTS FOR (n:IpPermissionsEgress) ON (n.lastupdated);
163
157
  CREATE INDEX IF NOT EXISTS FOR (n:IpRange) ON (n.id);
164
158
  CREATE INDEX IF NOT EXISTS FOR (n:IpRange) ON (n.lastupdated);
165
- CREATE INDEX IF NOT EXISTS FOR (n:IpRule) ON (n.ruleid);
166
- CREATE INDEX IF NOT EXISTS FOR (n:IpRule) ON (n.lastupdated);
167
159
  CREATE INDEX IF NOT EXISTS FOR (n:JamfComputerGroup) ON (n.id);
168
160
  CREATE INDEX IF NOT EXISTS FOR (n:JamfComputerGroup) ON (n.lastupdated);
169
161
  CREATE INDEX IF NOT EXISTS FOR (n:KMSKey) ON (n.id);
@@ -19,6 +19,7 @@
19
19
  "iterative": true,
20
20
  "iterationsize": 100
21
21
  },
22
+
22
23
  {
23
24
  "query": "MATCH (:GitHubBranch)-[r:BRANCH]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
24
25
  "iterative": true,
@@ -39,6 +40,7 @@
39
40
  "iterative": true,
40
41
  "iterationsize": 100
41
42
  },
43
+
42
44
  {
43
45
  "query": "MATCH (:GitHubUser)-[r:OUTSIDE_COLLAB_ADMIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
44
46
  "iterative": true,
@@ -310,6 +310,7 @@ def start_aws_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
310
310
  common_job_parameters = {
311
311
  "UPDATE_TAG": config.update_tag,
312
312
  "permission_relationships_file": config.permission_relationships_file,
313
+ "aws_guardduty_severity_threshold": config.aws_guardduty_severity_threshold,
313
314
  "aws_cloudtrail_management_events_lookback_hours": config.aws_cloudtrail_management_events_lookback_hours,
314
315
  }
315
316
  try:
@@ -1,17 +1,22 @@
1
1
  import logging
2
- from string import Template
2
+ from collections import namedtuple
3
+ from typing import Any
3
4
  from typing import Dict
4
5
  from typing import List
5
6
 
6
7
  import boto3
7
8
  import neo4j
8
9
 
10
+ from cartography.client.core.tx import load
9
11
  from cartography.graph.job import GraphJob
12
+ from cartography.models.aws.ec2.security_group_rules import IpPermissionInboundSchema
13
+ from cartography.models.aws.ec2.security_group_rules import IpRangeSchema
14
+ from cartography.models.aws.ec2.security_group_rules import IpRuleSchema
15
+ from cartography.models.aws.ec2.security_groups import EC2SecurityGroupSchema
10
16
  from cartography.models.aws.ec2.securitygroup_instance import (
11
17
  EC2SecurityGroupInstanceSchema,
12
18
  )
13
19
  from cartography.util import aws_handle_regions
14
- from cartography.util import run_cleanup_job
15
20
  from cartography.util import timeit
16
21
 
17
22
  from .util import get_botocore_config
@@ -37,138 +42,146 @@ def get_ec2_security_group_data(
37
42
  return security_groups
38
43
 
39
44
 
45
+ Ec2SecurityGroupData = namedtuple(
46
+ "Ec2SecurityGroupData",
47
+ ["groups", "inbound_rules", "egress_rules", "ranges"],
48
+ )
49
+
50
+
51
+ def transform_ec2_security_group_data(
52
+ data: List[Dict[str, Any]],
53
+ ) -> Ec2SecurityGroupData:
54
+ groups: List[Dict[str, Any]] = []
55
+ inbound_rules: List[Dict[str, Any]] = []
56
+ egress_rules: List[Dict[str, Any]] = []
57
+ ranges: List[Dict[str, Any]] = []
58
+
59
+ for group in data:
60
+ group_record = {
61
+ "GroupId": group["GroupId"],
62
+ "GroupName": group.get("GroupName"),
63
+ "Description": group.get("Description"),
64
+ "VpcId": group.get("VpcId"),
65
+ }
66
+ # Collect referenced security groups for relationship loading
67
+ source_group_ids: set[str] = set()
68
+
69
+ for rule_type, target in (
70
+ ("IpPermissions", inbound_rules),
71
+ ("IpPermissionsEgress", egress_rules),
72
+ ):
73
+ for rule in group.get(rule_type, []):
74
+ protocol = rule.get("IpProtocol", "all")
75
+ from_port = rule.get("FromPort")
76
+ to_port = rule.get("ToPort")
77
+ rule_id = (
78
+ f"{group['GroupId']}/{rule_type}/{from_port}{to_port}{protocol}"
79
+ )
80
+ target.append(
81
+ {
82
+ "RuleId": rule_id,
83
+ "GroupId": group["GroupId"],
84
+ "Protocol": protocol,
85
+ "FromPort": from_port,
86
+ "ToPort": to_port,
87
+ },
88
+ )
89
+ for ip_range in rule.get("IpRanges", []):
90
+ ranges.append({"RangeId": ip_range["CidrIp"], "RuleId": rule_id})
91
+ for pair in rule.get("UserIdGroupPairs", []):
92
+ sg_id = pair.get("GroupId")
93
+ if sg_id:
94
+ source_group_ids.add(sg_id)
95
+
96
+ group_record["SOURCE_GROUP_IDS"] = list(source_group_ids)
97
+ groups.append(group_record)
98
+
99
+ return Ec2SecurityGroupData(
100
+ groups=groups,
101
+ inbound_rules=inbound_rules,
102
+ egress_rules=egress_rules,
103
+ ranges=ranges,
104
+ )
105
+
106
+
40
107
  @timeit
41
- def load_ec2_security_group_rule(
108
+ def load_ip_rules(
42
109
  neo4j_session: neo4j.Session,
43
- group: Dict,
44
- rule_type: str,
110
+ data: List[Dict[str, Any]],
111
+ inbound: bool,
112
+ region: str,
113
+ aws_account_id: str,
45
114
  update_tag: int,
46
115
  ) -> None:
47
- INGEST_RULE_TEMPLATE = Template(
48
- """
49
- MERGE (rule:$rule_label{ruleid: $RuleId})
50
- ON CREATE SET rule :IpRule, rule.firstseen = timestamp(), rule.fromport = $FromPort, rule.toport = $ToPort,
51
- rule.protocol = $Protocol
52
- SET rule.lastupdated = $update_tag
53
- WITH rule
54
- MATCH (group:EC2SecurityGroup{groupid: $GroupId})
55
- MERGE (group)<-[r:MEMBER_OF_EC2_SECURITY_GROUP]-(rule)
56
- ON CREATE SET r.firstseen = timestamp()
57
- SET r.lastupdated = $update_tag;
58
- """,
116
+ schema = IpPermissionInboundSchema() if inbound else IpRuleSchema()
117
+ load(
118
+ neo4j_session,
119
+ schema,
120
+ data,
121
+ Region=region,
122
+ AWS_ID=aws_account_id,
123
+ lastupdated=update_tag,
59
124
  )
60
125
 
61
- ingest_rule_group_pair = """
62
- MERGE (group:EC2SecurityGroup{id: $GroupId})
63
- ON CREATE SET group.firstseen = timestamp(), group.groupid = $GroupId
64
- SET group.lastupdated = $update_tag
65
- WITH group
66
- MATCH (inbound:IpRule{ruleid: $RuleId})
67
- MERGE (inbound)-[r:MEMBER_OF_EC2_SECURITY_GROUP]->(group)
68
- ON CREATE SET r.firstseen = timestamp()
69
- SET r.lastupdated = $update_tag
70
- """
71
-
72
- ingest_range = """
73
- MERGE (range:IpRange{id: $RangeId})
74
- ON CREATE SET range.firstseen = timestamp(), range.range = $RangeId
75
- SET range.lastupdated = $update_tag
76
- WITH range
77
- MATCH (rule:IpRule{ruleid: $RuleId})
78
- MERGE (rule)<-[r:MEMBER_OF_IP_RULE]-(range)
79
- ON CREATE SET r.firstseen = timestamp()
80
- SET r.lastupdated = $update_tag
81
- """
82
-
83
- group_id = group["GroupId"]
84
- rule_type_map = {
85
- "IpPermissions": "IpPermissionInbound",
86
- "IpPermissionsEgress": "IpPermissionEgress",
87
- }
88
-
89
- if group.get(rule_type):
90
- for rule in group[rule_type]:
91
- protocol = rule.get("IpProtocol", "all")
92
- from_port = rule.get("FromPort")
93
- to_port = rule.get("ToPort")
94
-
95
- ruleid = f"{group_id}/{rule_type}/{from_port}{to_port}{protocol}"
96
- # NOTE Cypher query syntax is incompatible with Python string formatting, so we have to do this awkward
97
- # NOTE manual formatting instead.
98
- neo4j_session.run(
99
- INGEST_RULE_TEMPLATE.safe_substitute(
100
- rule_label=rule_type_map[rule_type],
101
- ),
102
- RuleId=ruleid,
103
- FromPort=from_port,
104
- ToPort=to_port,
105
- Protocol=protocol,
106
- GroupId=group_id,
107
- update_tag=update_tag,
108
- )
109
-
110
- neo4j_session.run(
111
- ingest_rule_group_pair,
112
- GroupId=group_id,
113
- RuleId=ruleid,
114
- update_tag=update_tag,
115
- )
116
-
117
- for ip_range in rule["IpRanges"]:
118
- range_id = ip_range["CidrIp"]
119
- neo4j_session.run(
120
- ingest_range,
121
- RangeId=range_id,
122
- RuleId=ruleid,
123
- update_tag=update_tag,
124
- )
126
+
127
+ @timeit
128
+ def load_ip_ranges(
129
+ neo4j_session: neo4j.Session,
130
+ data: List[Dict[str, Any]],
131
+ region: str,
132
+ aws_account_id: str,
133
+ update_tag: int,
134
+ ) -> None:
135
+ load(
136
+ neo4j_session,
137
+ IpRangeSchema(),
138
+ data,
139
+ Region=region,
140
+ AWS_ID=aws_account_id,
141
+ lastupdated=update_tag,
142
+ )
125
143
 
126
144
 
127
145
  @timeit
128
146
  def load_ec2_security_groupinfo(
129
147
  neo4j_session: neo4j.Session,
130
- data: List[Dict],
148
+ data: Ec2SecurityGroupData,
131
149
  region: str,
132
150
  current_aws_account_id: str,
133
151
  update_tag: int,
134
152
  ) -> None:
135
- ingest_security_group = """
136
- MERGE (group:EC2SecurityGroup{id: $GroupId})
137
- ON CREATE SET group.firstseen = timestamp(), group.groupid = $GroupId
138
- SET group.name = $GroupName, group.description = $Description, group.region = $Region,
139
- group.lastupdated = $update_tag
140
- WITH group
141
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
142
- MERGE (aa)-[r:RESOURCE]->(group)
143
- ON CREATE SET r.firstseen = timestamp()
144
- SET r.lastupdated = $update_tag
145
- WITH group
146
- MATCH (vpc:AWSVpc{id: $VpcId})
147
- MERGE (vpc)-[rg:MEMBER_OF_EC2_SECURITY_GROUP]->(group)
148
- ON CREATE SET rg.firstseen = timestamp()
149
- """
150
-
151
- for group in data:
152
- group_id = group["GroupId"]
153
-
154
- neo4j_session.run(
155
- ingest_security_group,
156
- GroupId=group_id,
157
- GroupName=group.get("GroupName"),
158
- Description=group.get("Description"),
159
- VpcId=group.get("VpcId", None),
160
- Region=region,
161
- AWS_ACCOUNT_ID=current_aws_account_id,
162
- update_tag=update_tag,
163
- )
153
+ load(
154
+ neo4j_session,
155
+ EC2SecurityGroupSchema(),
156
+ data.groups,
157
+ Region=region,
158
+ AWS_ID=current_aws_account_id,
159
+ lastupdated=update_tag,
160
+ )
164
161
 
165
- load_ec2_security_group_rule(neo4j_session, group, "IpPermissions", update_tag)
166
- load_ec2_security_group_rule(
167
- neo4j_session,
168
- group,
169
- "IpPermissionsEgress",
170
- update_tag,
171
- )
162
+ load_ip_rules(
163
+ neo4j_session,
164
+ data.inbound_rules,
165
+ inbound=True,
166
+ region=region,
167
+ aws_account_id=current_aws_account_id,
168
+ update_tag=update_tag,
169
+ )
170
+ load_ip_rules(
171
+ neo4j_session,
172
+ data.egress_rules,
173
+ inbound=False,
174
+ region=region,
175
+ aws_account_id=current_aws_account_id,
176
+ update_tag=update_tag,
177
+ )
178
+ load_ip_ranges(
179
+ neo4j_session,
180
+ data.ranges,
181
+ region,
182
+ current_aws_account_id,
183
+ update_tag,
184
+ )
172
185
 
173
186
 
174
187
  @timeit
@@ -176,11 +189,15 @@ def cleanup_ec2_security_groupinfo(
176
189
  neo4j_session: neo4j.Session,
177
190
  common_job_parameters: Dict,
178
191
  ) -> None:
179
- run_cleanup_job(
180
- "aws_import_ec2_security_groupinfo_cleanup.json",
181
- neo4j_session,
192
+ GraphJob.from_node_schema(
193
+ EC2SecurityGroupSchema(),
182
194
  common_job_parameters,
195
+ ).run(neo4j_session)
196
+ GraphJob.from_node_schema(IpPermissionInboundSchema(), common_job_parameters).run(
197
+ neo4j_session,
183
198
  )
199
+ GraphJob.from_node_schema(IpRuleSchema(), common_job_parameters).run(neo4j_session)
200
+ GraphJob.from_node_schema(IpRangeSchema(), common_job_parameters).run(neo4j_session)
184
201
  GraphJob.from_node_schema(
185
202
  EC2SecurityGroupInstanceSchema(),
186
203
  common_job_parameters,
@@ -203,9 +220,10 @@ def sync_ec2_security_groupinfo(
203
220
  current_aws_account_id,
204
221
  )
205
222
  data = get_ec2_security_group_data(boto3_session, region)
223
+ transformed = transform_ec2_security_group_data(data)
206
224
  load_ec2_security_groupinfo(
207
225
  neo4j_session,
208
- data,
226
+ transformed,
209
227
  region,
210
228
  current_aws_account_id,
211
229
  update_tag,
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from typing import Any
2
3
  from typing import Dict
3
4
  from typing import List
4
5
 
@@ -6,8 +7,11 @@ import boto3
6
7
  import neo4j
7
8
  from botocore.exceptions import ClientError
8
9
 
10
+ from cartography.client.core.tx import load
11
+ from cartography.client.core.tx import read_list_of_values_tx
12
+ from cartography.graph.job import GraphJob
13
+ from cartography.models.aws.ec2.snapshots import EBSSnapshotSchema
9
14
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
15
  from cartography.util import timeit
12
16
 
13
17
  logger = logging.getLogger(__name__)
@@ -24,12 +28,13 @@ def get_snapshots_in_use(
24
28
  WHERE v.region = $Region
25
29
  RETURN v.snapshotid as snapshot
26
30
  """
27
- results = neo4j_session.run(
31
+ results = read_list_of_values_tx(
32
+ neo4j_session,
28
33
  query,
29
34
  AWS_ACCOUNT_ID=current_aws_account_id,
30
35
  Region=region,
31
36
  )
32
- return [r["snapshot"] for r in results if r["snapshot"]]
37
+ return [str(snapshot) for snapshot in results if snapshot]
33
38
 
34
39
 
35
40
  @timeit
@@ -45,7 +50,6 @@ def get_snapshots(
45
50
  for page in paginator.paginate(OwnerIds=["self"]):
46
51
  snapshots.extend(page["Snapshots"])
47
52
 
48
- # fetch in-use snapshots not in self_owned snapshots
49
53
  self_owned_snapshot_ids = {s["SnapshotId"] for s in snapshots}
50
54
  other_snapshot_ids = set(in_use_snapshot_ids) - self_owned_snapshot_ids
51
55
  if other_snapshot_ids:
@@ -55,8 +59,7 @@ def get_snapshots(
55
59
  except ClientError as e:
56
60
  if e.response["Error"]["Code"] == "InvalidSnapshot.NotFound":
57
61
  logger.warning(
58
- f"Failed to retrieve page of in-use, \
59
- not owned snapshots. Continuing anyway. Error - {e}",
62
+ f"Failed to retrieve page of in-use, not owned snapshots. Continuing anyway. Error - {e}"
60
63
  )
61
64
  else:
62
65
  raise
@@ -64,93 +67,53 @@ def get_snapshots(
64
67
  return snapshots
65
68
 
66
69
 
70
+ def transform_snapshots(snapshots: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
71
+ transformed: List[Dict[str, Any]] = []
72
+ for snap in snapshots:
73
+ transformed.append(
74
+ {
75
+ "SnapshotId": snap["SnapshotId"],
76
+ "Description": snap.get("Description"),
77
+ "Encrypted": snap.get("Encrypted"),
78
+ "Progress": snap.get("Progress"),
79
+ "StartTime": snap.get("StartTime"),
80
+ "State": snap.get("State"),
81
+ "StateMessage": snap.get("StateMessage"),
82
+ "VolumeId": snap.get("VolumeId"),
83
+ "VolumeSize": snap.get("VolumeSize"),
84
+ "OutpostArn": snap.get("OutpostArn"),
85
+ "DataEncryptionKeyId": snap.get("DataEncryptionKeyId"),
86
+ "KmsKeyId": snap.get("KmsKeyId"),
87
+ }
88
+ )
89
+ return transformed
90
+
91
+
67
92
  @timeit
68
93
  def load_snapshots(
69
94
  neo4j_session: neo4j.Session,
70
- data: List[Dict],
95
+ data: List[Dict[str, Any]],
71
96
  region: str,
72
97
  current_aws_account_id: str,
73
98
  update_tag: int,
74
99
  ) -> None:
75
- ingest_snapshots = """
76
- UNWIND $snapshots_list as snapshot
77
- MERGE (s:EBSSnapshot{id: snapshot.SnapshotId})
78
- ON CREATE SET s.firstseen = timestamp()
79
- SET s.lastupdated = $update_tag, s.description = snapshot.Description, s.encrypted = snapshot.Encrypted,
80
- s.progress = snapshot.Progress, s.starttime = snapshot.StartTime, s.state = snapshot.State,
81
- s.statemessage = snapshot.StateMessage, s.volumeid = snapshot.VolumeId, s.volumesize = snapshot.VolumeSize,
82
- s.outpostarn = snapshot.OutpostArn, s.dataencryptionkeyid = snapshot.DataEncryptionKeyId,
83
- s.kmskeyid = snapshot.KmsKeyId, s.region=$Region
84
- WITH s
85
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
86
- MERGE (aa)-[r:RESOURCE]->(s)
87
- ON CREATE SET r.firstseen = timestamp()
88
- SET r.lastupdated = $update_tag
89
- """
90
-
91
- for snapshot in data:
92
- snapshot["StartTime"] = str(snapshot["StartTime"])
93
-
94
- neo4j_session.run(
95
- ingest_snapshots,
96
- snapshots_list=data,
97
- AWS_ACCOUNT_ID=current_aws_account_id,
100
+ load(
101
+ neo4j_session,
102
+ EBSSnapshotSchema(),
103
+ data,
104
+ lastupdated=update_tag,
98
105
  Region=region,
99
- update_tag=update_tag,
100
- )
101
-
102
-
103
- @timeit
104
- def get_snapshot_volumes(snapshots: List[Dict]) -> List[Dict]:
105
- snapshot_volumes: List[Dict] = []
106
- for snapshot in snapshots:
107
- if snapshot.get("VolumeId"):
108
- snapshot_volumes.append(snapshot)
109
-
110
- return snapshot_volumes
111
-
112
-
113
- @timeit
114
- def load_snapshot_volume_relations(
115
- neo4j_session: neo4j.Session,
116
- data: List[Dict],
117
- current_aws_account_id: str,
118
- update_tag: int,
119
- ) -> None:
120
- ingest_volumes = """
121
- UNWIND $snapshot_volumes_list as volume
122
- MERGE (v:EBSVolume{id: volume.VolumeId})
123
- ON CREATE SET v.firstseen = timestamp()
124
- SET v.lastupdated = $update_tag, v.snapshotid = volume.SnapshotId
125
- WITH v, volume
126
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
127
- MERGE (aa)-[r:RESOURCE]->(v)
128
- ON CREATE SET r.firstseen = timestamp()
129
- SET r.lastupdated = $update_tag
130
- WITH v, volume
131
- MATCH (s:EBSSnapshot{id: volume.SnapshotId})
132
- MERGE (s)-[r:CREATED_FROM]->(v)
133
- ON CREATE SET r.firstseen = timestamp()
134
- SET r.lastupdated = $update_tag
135
- """
136
-
137
- neo4j_session.run(
138
- ingest_volumes,
139
- snapshot_volumes_list=data,
140
- AWS_ACCOUNT_ID=current_aws_account_id,
141
- update_tag=update_tag,
106
+ AWS_ID=current_aws_account_id,
142
107
  )
143
108
 
144
109
 
145
110
  @timeit
146
111
  def cleanup_snapshots(
147
112
  neo4j_session: neo4j.Session,
148
- common_job_parameters: Dict,
113
+ common_job_parameters: Dict[str, Any],
149
114
  ) -> None:
150
- run_cleanup_job(
151
- "aws_import_snapshots_cleanup.json",
152
- neo4j_session,
153
- common_job_parameters,
115
+ GraphJob.from_node_schema(EBSSnapshotSchema(), common_job_parameters).run(
116
+ neo4j_session
154
117
  )
155
118
 
156
119
 
@@ -161,7 +124,7 @@ def sync_ebs_snapshots(
161
124
  regions: List[str],
162
125
  current_aws_account_id: str,
163
126
  update_tag: int,
164
- common_job_parameters: Dict,
127
+ common_job_parameters: Dict[str, Any],
165
128
  ) -> None:
166
129
  for region in regions:
167
130
  logger.debug(
@@ -174,12 +137,12 @@ def sync_ebs_snapshots(
174
137
  region,
175
138
  current_aws_account_id,
176
139
  )
177
- data = get_snapshots(boto3_session, region, snapshots_in_use)
178
- load_snapshots(neo4j_session, data, region, current_aws_account_id, update_tag)
179
- snapshot_volumes = get_snapshot_volumes(data)
180
- load_snapshot_volume_relations(
140
+ raw_data = get_snapshots(boto3_session, region, snapshots_in_use)
141
+ transformed_data = transform_snapshots(raw_data)
142
+ load_snapshots(
181
143
  neo4j_session,
182
- snapshot_volumes,
144
+ transformed_data,
145
+ region,
183
146
  current_aws_account_id,
184
147
  update_tag,
185
148
  )