cartography 0.107.0rc2__py3-none-any.whl → 0.108.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (58) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +10 -0
  3. cartography/config.py +5 -0
  4. cartography/data/indexes.cypher +0 -10
  5. cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
  6. cartography/intel/aws/__init__.py +1 -0
  7. cartography/intel/aws/cloudtrail.py +17 -4
  8. cartography/intel/aws/cloudtrail_management_events.py +560 -16
  9. cartography/intel/aws/cloudwatch.py +150 -4
  10. cartography/intel/aws/ec2/security_groups.py +140 -122
  11. cartography/intel/aws/ec2/snapshots.py +47 -84
  12. cartography/intel/aws/ec2/subnets.py +37 -63
  13. cartography/intel/aws/ecr.py +55 -80
  14. cartography/intel/aws/ecs.py +17 -0
  15. cartography/intel/aws/elasticache.py +102 -79
  16. cartography/intel/aws/guardduty.py +275 -0
  17. cartography/intel/aws/resources.py +2 -0
  18. cartography/intel/aws/secretsmanager.py +62 -44
  19. cartography/intel/github/repos.py +370 -28
  20. cartography/intel/sentinelone/__init__.py +8 -2
  21. cartography/intel/sentinelone/application.py +248 -0
  22. cartography/intel/sentinelone/utils.py +20 -1
  23. cartography/models/aws/cloudtrail/management_events.py +95 -6
  24. cartography/models/aws/cloudtrail/trail.py +21 -0
  25. cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
  26. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  27. cartography/models/aws/ec2/networkinterfaces.py +2 -0
  28. cartography/models/aws/ec2/security_group_rules.py +109 -0
  29. cartography/models/aws/ec2/security_groups.py +90 -0
  30. cartography/models/aws/ec2/snapshots.py +58 -0
  31. cartography/models/aws/ec2/subnet_instance.py +2 -0
  32. cartography/models/aws/ec2/subnet_networkinterface.py +2 -0
  33. cartography/models/aws/ec2/subnets.py +65 -0
  34. cartography/models/aws/ec2/volumes.py +20 -0
  35. cartography/models/aws/ecr/__init__.py +0 -0
  36. cartography/models/aws/ecr/image.py +41 -0
  37. cartography/models/aws/ecr/repository.py +72 -0
  38. cartography/models/aws/ecr/repository_image.py +95 -0
  39. cartography/models/aws/ecs/tasks.py +24 -1
  40. cartography/models/aws/elasticache/__init__.py +0 -0
  41. cartography/models/aws/elasticache/cluster.py +65 -0
  42. cartography/models/aws/elasticache/topic.py +67 -0
  43. cartography/models/aws/guardduty/__init__.py +1 -0
  44. cartography/models/aws/guardduty/findings.py +102 -0
  45. cartography/models/aws/secretsmanager/secret.py +106 -0
  46. cartography/models/github/dependencies.py +74 -0
  47. cartography/models/github/manifests.py +49 -0
  48. cartography/models/sentinelone/application.py +44 -0
  49. cartography/models/sentinelone/application_version.py +96 -0
  50. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/METADATA +3 -3
  51. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/RECORD +55 -36
  52. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  53. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  54. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  55. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/WHEEL +0 -0
  56. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/entry_points.txt +0 -0
  57. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/licenses/LICENSE +0 -0
  58. {cartography-0.107.0rc2.dist-info → cartography-0.108.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from typing import Any
2
3
  from typing import Dict
3
4
  from typing import List
4
5
 
@@ -6,8 +7,11 @@ import boto3
6
7
  import neo4j
7
8
  from botocore.exceptions import ClientError
8
9
 
10
+ from cartography.client.core.tx import load
11
+ from cartography.client.core.tx import read_list_of_values_tx
12
+ from cartography.graph.job import GraphJob
13
+ from cartography.models.aws.ec2.snapshots import EBSSnapshotSchema
9
14
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
15
  from cartography.util import timeit
12
16
 
13
17
  logger = logging.getLogger(__name__)
@@ -24,12 +28,13 @@ def get_snapshots_in_use(
24
28
  WHERE v.region = $Region
25
29
  RETURN v.snapshotid as snapshot
26
30
  """
27
- results = neo4j_session.run(
31
+ results = read_list_of_values_tx(
32
+ neo4j_session,
28
33
  query,
29
34
  AWS_ACCOUNT_ID=current_aws_account_id,
30
35
  Region=region,
31
36
  )
32
- return [r["snapshot"] for r in results if r["snapshot"]]
37
+ return [str(snapshot) for snapshot in results if snapshot]
33
38
 
34
39
 
35
40
  @timeit
@@ -45,7 +50,6 @@ def get_snapshots(
45
50
  for page in paginator.paginate(OwnerIds=["self"]):
46
51
  snapshots.extend(page["Snapshots"])
47
52
 
48
- # fetch in-use snapshots not in self_owned snapshots
49
53
  self_owned_snapshot_ids = {s["SnapshotId"] for s in snapshots}
50
54
  other_snapshot_ids = set(in_use_snapshot_ids) - self_owned_snapshot_ids
51
55
  if other_snapshot_ids:
@@ -55,8 +59,7 @@ def get_snapshots(
55
59
  except ClientError as e:
56
60
  if e.response["Error"]["Code"] == "InvalidSnapshot.NotFound":
57
61
  logger.warning(
58
- f"Failed to retrieve page of in-use, \
59
- not owned snapshots. Continuing anyway. Error - {e}",
62
+ f"Failed to retrieve page of in-use, not owned snapshots. Continuing anyway. Error - {e}"
60
63
  )
61
64
  else:
62
65
  raise
@@ -64,93 +67,53 @@ def get_snapshots(
64
67
  return snapshots
65
68
 
66
69
 
70
+ def transform_snapshots(snapshots: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
71
+ transformed: List[Dict[str, Any]] = []
72
+ for snap in snapshots:
73
+ transformed.append(
74
+ {
75
+ "SnapshotId": snap["SnapshotId"],
76
+ "Description": snap.get("Description"),
77
+ "Encrypted": snap.get("Encrypted"),
78
+ "Progress": snap.get("Progress"),
79
+ "StartTime": snap.get("StartTime"),
80
+ "State": snap.get("State"),
81
+ "StateMessage": snap.get("StateMessage"),
82
+ "VolumeId": snap.get("VolumeId"),
83
+ "VolumeSize": snap.get("VolumeSize"),
84
+ "OutpostArn": snap.get("OutpostArn"),
85
+ "DataEncryptionKeyId": snap.get("DataEncryptionKeyId"),
86
+ "KmsKeyId": snap.get("KmsKeyId"),
87
+ }
88
+ )
89
+ return transformed
90
+
91
+
67
92
  @timeit
68
93
  def load_snapshots(
69
94
  neo4j_session: neo4j.Session,
70
- data: List[Dict],
95
+ data: List[Dict[str, Any]],
71
96
  region: str,
72
97
  current_aws_account_id: str,
73
98
  update_tag: int,
74
99
  ) -> None:
75
- ingest_snapshots = """
76
- UNWIND $snapshots_list as snapshot
77
- MERGE (s:EBSSnapshot{id: snapshot.SnapshotId})
78
- ON CREATE SET s.firstseen = timestamp()
79
- SET s.lastupdated = $update_tag, s.description = snapshot.Description, s.encrypted = snapshot.Encrypted,
80
- s.progress = snapshot.Progress, s.starttime = snapshot.StartTime, s.state = snapshot.State,
81
- s.statemessage = snapshot.StateMessage, s.volumeid = snapshot.VolumeId, s.volumesize = snapshot.VolumeSize,
82
- s.outpostarn = snapshot.OutpostArn, s.dataencryptionkeyid = snapshot.DataEncryptionKeyId,
83
- s.kmskeyid = snapshot.KmsKeyId, s.region=$Region
84
- WITH s
85
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
86
- MERGE (aa)-[r:RESOURCE]->(s)
87
- ON CREATE SET r.firstseen = timestamp()
88
- SET r.lastupdated = $update_tag
89
- """
90
-
91
- for snapshot in data:
92
- snapshot["StartTime"] = str(snapshot["StartTime"])
93
-
94
- neo4j_session.run(
95
- ingest_snapshots,
96
- snapshots_list=data,
97
- AWS_ACCOUNT_ID=current_aws_account_id,
100
+ load(
101
+ neo4j_session,
102
+ EBSSnapshotSchema(),
103
+ data,
104
+ lastupdated=update_tag,
98
105
  Region=region,
99
- update_tag=update_tag,
100
- )
101
-
102
-
103
- @timeit
104
- def get_snapshot_volumes(snapshots: List[Dict]) -> List[Dict]:
105
- snapshot_volumes: List[Dict] = []
106
- for snapshot in snapshots:
107
- if snapshot.get("VolumeId"):
108
- snapshot_volumes.append(snapshot)
109
-
110
- return snapshot_volumes
111
-
112
-
113
- @timeit
114
- def load_snapshot_volume_relations(
115
- neo4j_session: neo4j.Session,
116
- data: List[Dict],
117
- current_aws_account_id: str,
118
- update_tag: int,
119
- ) -> None:
120
- ingest_volumes = """
121
- UNWIND $snapshot_volumes_list as volume
122
- MERGE (v:EBSVolume{id: volume.VolumeId})
123
- ON CREATE SET v.firstseen = timestamp()
124
- SET v.lastupdated = $update_tag, v.snapshotid = volume.SnapshotId
125
- WITH v, volume
126
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
127
- MERGE (aa)-[r:RESOURCE]->(v)
128
- ON CREATE SET r.firstseen = timestamp()
129
- SET r.lastupdated = $update_tag
130
- WITH v, volume
131
- MATCH (s:EBSSnapshot{id: volume.SnapshotId})
132
- MERGE (s)-[r:CREATED_FROM]->(v)
133
- ON CREATE SET r.firstseen = timestamp()
134
- SET r.lastupdated = $update_tag
135
- """
136
-
137
- neo4j_session.run(
138
- ingest_volumes,
139
- snapshot_volumes_list=data,
140
- AWS_ACCOUNT_ID=current_aws_account_id,
141
- update_tag=update_tag,
106
+ AWS_ID=current_aws_account_id,
142
107
  )
143
108
 
144
109
 
145
110
  @timeit
146
111
  def cleanup_snapshots(
147
112
  neo4j_session: neo4j.Session,
148
- common_job_parameters: Dict,
113
+ common_job_parameters: Dict[str, Any],
149
114
  ) -> None:
150
- run_cleanup_job(
151
- "aws_import_snapshots_cleanup.json",
152
- neo4j_session,
153
- common_job_parameters,
115
+ GraphJob.from_node_schema(EBSSnapshotSchema(), common_job_parameters).run(
116
+ neo4j_session
154
117
  )
155
118
 
156
119
 
@@ -161,7 +124,7 @@ def sync_ebs_snapshots(
161
124
  regions: List[str],
162
125
  current_aws_account_id: str,
163
126
  update_tag: int,
164
- common_job_parameters: Dict,
127
+ common_job_parameters: Dict[str, Any],
165
128
  ) -> None:
166
129
  for region in regions:
167
130
  logger.debug(
@@ -174,12 +137,12 @@ def sync_ebs_snapshots(
174
137
  region,
175
138
  current_aws_account_id,
176
139
  )
177
- data = get_snapshots(boto3_session, region, snapshots_in_use)
178
- load_snapshots(neo4j_session, data, region, current_aws_account_id, update_tag)
179
- snapshot_volumes = get_snapshot_volumes(data)
180
- load_snapshot_volume_relations(
140
+ raw_data = get_snapshots(boto3_session, region, snapshots_in_use)
141
+ transformed_data = transform_snapshots(raw_data)
142
+ load_snapshots(
181
143
  neo4j_session,
182
- snapshot_volumes,
144
+ transformed_data,
145
+ region,
183
146
  current_aws_account_id,
184
147
  update_tag,
185
148
  )
@@ -1,17 +1,17 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
2
+ from typing import Any
4
3
 
5
4
  import boto3
6
5
  import neo4j
7
6
 
7
+ from cartography.client.core.tx import load
8
8
  from cartography.graph.job import GraphJob
9
9
  from cartography.models.aws.ec2.auto_scaling_groups import (
10
10
  EC2SubnetAutoScalingGroupSchema,
11
11
  )
12
12
  from cartography.models.aws.ec2.subnet_instance import EC2SubnetInstanceSchema
13
+ from cartography.models.aws.ec2.subnets import EC2SubnetSchema
13
14
  from cartography.util import aws_handle_regions
14
- from cartography.util import run_cleanup_job
15
15
  from cartography.util import timeit
16
16
 
17
17
  from .util import get_botocore_config
@@ -21,86 +21,53 @@ logger = logging.getLogger(__name__)
21
21
 
22
22
  @timeit
23
23
  @aws_handle_regions
24
- def get_subnet_data(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
24
+ def get_subnet_data(
25
+ boto3_session: boto3.session.Session, region: str
26
+ ) -> list[dict[str, Any]]:
25
27
  client = boto3_session.client(
26
28
  "ec2",
27
29
  region_name=region,
28
30
  config=get_botocore_config(),
29
31
  )
30
32
  paginator = client.get_paginator("describe_subnets")
31
- subnets: List[Dict] = []
33
+ subnets: list[dict[str, Any]] = []
32
34
  for page in paginator.paginate():
33
35
  subnets.extend(page["Subnets"])
34
36
  return subnets
35
37
 
36
38
 
39
+ def transform_subnet_data(subnets: list[dict[str, Any]]) -> list[dict[str, Any]]:
40
+ """Transform subnet data into a loadable format."""
41
+ transformed: list[dict[str, Any]] = []
42
+ for subnet in subnets:
43
+ transformed.append(subnet.copy())
44
+ return transformed
45
+
46
+
37
47
  @timeit
38
48
  def load_subnets(
39
49
  neo4j_session: neo4j.Session,
40
- data: List[Dict],
50
+ data: list[dict[str, Any]],
41
51
  region: str,
42
52
  aws_account_id: str,
43
53
  aws_update_tag: int,
44
54
  ) -> None:
45
-
46
- ingest_subnets = """
47
- UNWIND $subnets as subnet
48
- MERGE (snet:EC2Subnet{subnetid: subnet.SubnetId})
49
- ON CREATE SET snet.firstseen = timestamp()
50
- SET snet.lastupdated = $aws_update_tag, snet.name = subnet.CidrBlock, snet.cidr_block = subnet.CidrBlock,
51
- snet.available_ip_address_count = subnet.AvailableIpAddressCount, snet.default_for_az = subnet.DefaultForAz,
52
- snet.map_customer_owned_ip_on_launch = subnet.MapCustomerOwnedIpOnLaunch,
53
- snet.state = subnet.State, snet.assignipv6addressoncreation = subnet.AssignIpv6AddressOnCreation,
54
- snet.map_public_ip_on_launch = subnet.MapPublicIpOnLaunch, snet.subnet_arn = subnet.SubnetArn,
55
- snet.availability_zone = subnet.AvailabilityZone, snet.availability_zone_id = subnet.AvailabilityZoneId,
56
- snet.subnetid = subnet.SubnetId
57
- """
58
-
59
- ingest_subnet_vpc_relations = """
60
- UNWIND $subnets as subnet
61
- MATCH (snet:EC2Subnet{subnetid: subnet.SubnetId}), (vpc:AWSVpc{id: subnet.VpcId})
62
- MERGE (snet)-[r:MEMBER_OF_AWS_VPC]->(vpc)
63
- ON CREATE SET r.firstseen = timestamp()
64
- SET r.lastupdated = $aws_update_tag
65
- """
66
-
67
- ingest_subnet_aws_account_relations = """
68
- UNWIND $subnets as subnet
69
- MATCH (snet:EC2Subnet{subnetid: subnet.SubnetId}), (aws:AWSAccount{id: $aws_account_id})
70
- MERGE (aws)-[r:RESOURCE]->(snet)
71
- ON CREATE SET r.firstseen = timestamp()
72
- SET r.lastupdated = $aws_update_tag
73
- """
74
-
75
- neo4j_session.run(
76
- ingest_subnets,
77
- subnets=data,
78
- aws_update_tag=aws_update_tag,
79
- region=region,
80
- aws_account_id=aws_account_id,
81
- )
82
- neo4j_session.run(
83
- ingest_subnet_vpc_relations,
84
- subnets=data,
85
- aws_update_tag=aws_update_tag,
86
- region=region,
87
- aws_account_id=aws_account_id,
88
- )
89
- neo4j_session.run(
90
- ingest_subnet_aws_account_relations,
91
- subnets=data,
92
- aws_update_tag=aws_update_tag,
93
- region=region,
94
- aws_account_id=aws_account_id,
55
+ load(
56
+ neo4j_session,
57
+ EC2SubnetSchema(),
58
+ data,
59
+ Region=region,
60
+ AWS_ID=aws_account_id,
61
+ lastupdated=aws_update_tag,
95
62
  )
96
63
 
97
64
 
98
65
  @timeit
99
- def cleanup_subnets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
100
- run_cleanup_job(
101
- "aws_ingest_subnets_cleanup.json",
66
+ def cleanup_subnets(
67
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
68
+ ) -> None:
69
+ GraphJob.from_node_schema(EC2SubnetSchema(), common_job_parameters).run(
102
70
  neo4j_session,
103
- common_job_parameters,
104
71
  )
105
72
  GraphJob.from_node_schema(EC2SubnetInstanceSchema(), common_job_parameters).run(
106
73
  neo4j_session,
@@ -115,10 +82,10 @@ def cleanup_subnets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -
115
82
  def sync_subnets(
116
83
  neo4j_session: neo4j.Session,
117
84
  boto3_session: boto3.session.Session,
118
- regions: List[str],
85
+ regions: list[str],
119
86
  current_aws_account_id: str,
120
87
  update_tag: int,
121
- common_job_parameters: Dict,
88
+ common_job_parameters: dict[str, Any],
122
89
  ) -> None:
123
90
  for region in regions:
124
91
  logger.info(
@@ -127,5 +94,12 @@ def sync_subnets(
127
94
  current_aws_account_id,
128
95
  )
129
96
  data = get_subnet_data(boto3_session, region)
130
- load_subnets(neo4j_session, data, region, current_aws_account_id, update_tag)
97
+ transformed = transform_subnet_data(data)
98
+ load_subnets(
99
+ neo4j_session,
100
+ transformed,
101
+ region,
102
+ current_aws_account_id,
103
+ update_tag,
104
+ )
131
105
  cleanup_subnets(neo4j_session, common_job_parameters)
@@ -6,9 +6,12 @@ from typing import List
6
6
  import boto3
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.aws.ecr.image import ECRImageSchema
12
+ from cartography.models.aws.ecr.repository import ECRRepositorySchema
13
+ from cartography.models.aws.ecr.repository_image import ECRRepositoryImageSchema
9
14
  from cartography.util import aws_handle_regions
10
- from cartography.util import batch
11
- from cartography.util import run_cleanup_job
12
15
  from cartography.util import timeit
13
16
  from cartography.util import to_asynchronous
14
17
  from cartography.util import to_synchronous
@@ -74,33 +77,17 @@ def load_ecr_repositories(
74
77
  current_aws_account_id: str,
75
78
  aws_update_tag: int,
76
79
  ) -> None:
77
- query = """
78
- UNWIND $Repositories as ecr_repo
79
- MERGE (repo:ECRRepository{id: ecr_repo.repositoryArn})
80
- ON CREATE SET repo.firstseen = timestamp(),
81
- repo.arn = ecr_repo.repositoryArn,
82
- repo.name = ecr_repo.repositoryName,
83
- repo.region = $Region,
84
- repo.created_at = ecr_repo.createdAt
85
- SET repo.lastupdated = $aws_update_tag,
86
- repo.uri = ecr_repo.repositoryUri
87
- WITH repo
88
-
89
- MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
90
- MERGE (owner)-[r:RESOURCE]->(repo)
91
- ON CREATE SET r.firstseen = timestamp()
92
- SET r.lastupdated = $aws_update_tag
93
- """
94
80
  logger.info(
95
81
  f"Loading {len(repos)} ECR repositories for region {region} into graph.",
96
82
  )
97
- neo4j_session.run(
98
- query,
99
- Repositories=repos,
83
+ load(
84
+ neo4j_session,
85
+ ECRRepositorySchema(),
86
+ repos,
87
+ lastupdated=aws_update_tag,
100
88
  Region=region,
101
- aws_update_tag=aws_update_tag,
102
- AWS_ACCOUNT_ID=current_aws_account_id,
103
- ).consume() # See issue #440
89
+ AWS_ID=current_aws_account_id,
90
+ )
104
91
 
105
92
 
106
93
  @timeit
@@ -114,8 +101,13 @@ def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
114
101
  for repo_uri in sorted(repo_data.keys()):
115
102
  repo_images = repo_data[repo_uri]
116
103
  for img in repo_images:
117
- if "imageDigest" in img and img["imageDigest"]:
104
+ digest = img.get("imageDigest")
105
+ if digest:
106
+ tag = img.get("imageTag")
107
+ uri = repo_uri + (f":{tag}" if tag else "")
118
108
  img["repo_uri"] = repo_uri
109
+ img["uri"] = uri
110
+ img["id"] = uri
119
111
  repo_images_list.append(img)
120
112
  else:
121
113
  logger.warning(
@@ -127,74 +119,51 @@ def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
127
119
  return repo_images_list
128
120
 
129
121
 
130
- def _load_ecr_repo_img_tx(
131
- tx: neo4j.Transaction,
132
- repo_images_list: List[Dict],
133
- aws_update_tag: int,
134
- region: str,
135
- ) -> None:
136
- query = """
137
- UNWIND $RepoList as repo_img
138
- MERGE (ri:ECRRepositoryImage{id: repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, '')})
139
- ON CREATE SET ri.firstseen = timestamp()
140
- SET ri.lastupdated = $aws_update_tag,
141
- ri.tag = repo_img.imageTag,
142
- ri.uri = repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, ''),
143
- ri.image_size_bytes = repo_img.imageSizeInBytes,
144
- ri.image_pushed_at = repo_img.imagePushedAt,
145
- ri.image_manifest_media_type = repo_img.imageManifestMediaType,
146
- ri.artifact_media_type = repo_img.artifactMediaType,
147
- ri.last_recorded_pull_time = repo_img.lastRecordedPullTime
148
- WITH ri, repo_img
149
-
150
- MERGE (img:ECRImage{id: repo_img.imageDigest})
151
- ON CREATE SET img.firstseen = timestamp(),
152
- img.digest = repo_img.imageDigest
153
- SET img.lastupdated = $aws_update_tag,
154
- img.region = $Region
155
- WITH ri, img, repo_img
156
-
157
- MERGE (ri)-[r1:IMAGE]->(img)
158
- ON CREATE SET r1.firstseen = timestamp()
159
- SET r1.lastupdated = $aws_update_tag
160
- WITH ri, repo_img
161
-
162
- MATCH (repo:ECRRepository{uri: repo_img.repo_uri})
163
- MERGE (repo)-[r2:REPO_IMAGE]->(ri)
164
- ON CREATE SET r2.firstseen = timestamp()
165
- SET r2.lastupdated = $aws_update_tag
166
- """
167
- tx.run(
168
- query,
169
- RepoList=repo_images_list,
170
- Region=region,
171
- aws_update_tag=aws_update_tag,
172
- )
173
-
174
-
175
122
  @timeit
176
123
  def load_ecr_repository_images(
177
124
  neo4j_session: neo4j.Session,
178
125
  repo_images_list: List[Dict],
179
126
  region: str,
127
+ current_aws_account_id: str,
180
128
  aws_update_tag: int,
181
129
  ) -> None:
182
130
  logger.info(
183
131
  f"Loading {len(repo_images_list)} ECR repository images in {region} into graph.",
184
132
  )
185
- for repo_image_batch in batch(repo_images_list, size=10000):
186
- neo4j_session.write_transaction(
187
- _load_ecr_repo_img_tx,
188
- repo_image_batch,
189
- aws_update_tag,
190
- region,
191
- )
133
+ image_digests = {img["imageDigest"] for img in repo_images_list}
134
+ ecr_images = [{"imageDigest": d} for d in image_digests]
135
+
136
+ load(
137
+ neo4j_session,
138
+ ECRImageSchema(),
139
+ ecr_images,
140
+ lastupdated=aws_update_tag,
141
+ Region=region,
142
+ AWS_ID=current_aws_account_id,
143
+ )
144
+
145
+ load(
146
+ neo4j_session,
147
+ ECRRepositoryImageSchema(),
148
+ repo_images_list,
149
+ lastupdated=aws_update_tag,
150
+ Region=region,
151
+ AWS_ID=current_aws_account_id,
152
+ )
192
153
 
193
154
 
194
155
  @timeit
195
156
  def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
196
157
  logger.debug("Running ECR cleanup job.")
197
- run_cleanup_job("aws_import_ecr_cleanup.json", neo4j_session, common_job_parameters)
158
+ GraphJob.from_node_schema(ECRRepositorySchema(), common_job_parameters).run(
159
+ neo4j_session
160
+ )
161
+ GraphJob.from_node_schema(ECRRepositoryImageSchema(), common_job_parameters).run(
162
+ neo4j_session
163
+ )
164
+ GraphJob.from_node_schema(ECRImageSchema(), common_job_parameters).run(
165
+ neo4j_session
166
+ )
198
167
 
199
168
 
200
169
  def _get_image_data(
@@ -251,5 +220,11 @@ def sync(
251
220
  update_tag,
252
221
  )
253
222
  repo_images_list = transform_ecr_repository_images(image_data)
254
- load_ecr_repository_images(neo4j_session, repo_images_list, region, update_tag)
223
+ load_ecr_repository_images(
224
+ neo4j_session,
225
+ repo_images_list,
226
+ region,
227
+ current_aws_account_id,
228
+ update_tag,
229
+ )
255
230
  cleanup(neo4j_session, common_job_parameters)
@@ -169,6 +169,22 @@ def _get_containers_from_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, An
169
169
  return containers
170
170
 
171
171
 
172
+ def transform_ecs_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
173
+ """
174
+ Extract network interface ID from task attachments.
175
+ """
176
+ for task in tasks:
177
+ for attachment in task.get("attachments", []):
178
+ if attachment.get("type") == "ElasticNetworkInterface":
179
+ details = attachment.get("details", [])
180
+ for detail in details:
181
+ if detail.get("name") == "networkInterfaceId":
182
+ task["networkInterfaceId"] = detail.get("value")
183
+ break
184
+ break
185
+ return tasks
186
+
187
+
172
188
  @timeit
173
189
  def load_ecs_clusters(
174
190
  neo4j_session: neo4j.Session,
@@ -407,6 +423,7 @@ def _sync_ecs_task_and_container_defns(
407
423
  boto3_session,
408
424
  region,
409
425
  )
426
+ tasks = transform_ecs_tasks(tasks)
410
427
  containers = _get_containers_from_tasks(tasks)
411
428
  load_ecs_tasks(
412
429
  neo4j_session,