cartography 0.99.0rc1__py3-none-any.whl → 0.100.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

cartography/_version.py CHANGED
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '0.99.0rc1'
16
- __version_tuple__ = version_tuple = (0, 99, 0)
20
+ __version__ = version = '0.100.0rc2'
21
+ __version_tuple__ = version_tuple = (0, 100, 0)
cartography/cli.py CHANGED
@@ -439,9 +439,10 @@ class CLI:
439
439
  '--gsuite-auth-method',
440
440
  type=str,
441
441
  default='delegated',
442
- choices=['delegated', 'oauth'],
442
+ choices=['delegated', 'oauth', 'default'],
443
443
  help=(
444
- 'The method used by GSuite to authenticate. delegated is the legacy one.'
444
+ 'GSuite authentication method. Can be "delegated" for service account or "oauth" for OAuth. '
445
+ '"Default" best if using gcloud CLI.'
445
446
  ),
446
447
  )
447
448
  parser.add_argument(
@@ -12,8 +12,13 @@ import neo4j
12
12
  from botocore.exceptions import ClientError
13
13
  from policyuniverse.policy import Policy
14
14
 
15
+ from cartography.client.core.tx import load
16
+ from cartography.graph.job import GraphJob
17
+ from cartography.models.aws.apigateway import APIGatewayRestAPISchema
18
+ from cartography.models.aws.apigatewaycertificate import APIGatewayClientCertificateSchema
19
+ from cartography.models.aws.apigatewayresource import APIGatewayResourceSchema
20
+ from cartography.models.aws.apigatewaystage import APIGatewayStageSchema
15
21
  from cartography.util import aws_handle_regions
16
- from cartography.util import run_cleanup_job
17
22
  from cartography.util import timeit
18
23
 
19
24
  logger = logging.getLogger(__name__)
@@ -107,222 +112,146 @@ def get_rest_api_policy(api: Dict, client: botocore.client.BaseClient) -> Any:
107
112
  return policy
108
113
 
109
114
 
110
- @timeit
111
- def load_apigateway_rest_apis(
112
- neo4j_session: neo4j.Session, rest_apis: List[Dict], region: str, current_aws_account_id: str,
113
- aws_update_tag: int,
114
- ) -> None:
115
- """
116
- Ingest the details of API Gateway REST APIs into neo4j.
115
+ def transform_apigateway_rest_apis(
116
+ rest_apis: List[Dict], resource_policies: List[Dict], region: str, current_aws_account_id: str, aws_update_tag: int,
117
+ ) -> List[Dict]:
117
118
  """
118
- ingest_rest_apis = """
119
- UNWIND $rest_apis_list AS r
120
- MERGE (rest_api:APIGatewayRestAPI{id:r.id})
121
- ON CREATE SET rest_api.firstseen = timestamp(),
122
- rest_api.createddate = r.createdDate
123
- SET rest_api.version = r.version,
124
- rest_api.minimumcompressionsize = r.minimumCompressionSize,
125
- rest_api.disableexecuteapiendpoint = r.disableExecuteApiEndpoint,
126
- rest_api.lastupdated = $aws_update_tag,
127
- rest_api.region = $Region
128
- WITH rest_api
129
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
130
- MERGE (aa)-[r:RESOURCE]->(rest_api)
131
- ON CREATE SET r.firstseen = timestamp()
132
- SET r.lastupdated = $aws_update_tag
119
+ Transform API Gateway REST API data for ingestion, including policy analysis
133
120
  """
121
+ # Create a mapping of api_id to policy data for easier lookup
122
+ policy_map = {
123
+ policy['api_id']: policy
124
+ for policy in resource_policies
125
+ }
134
126
 
135
- # neo4j does not accept datetime objects and values. This loop is used to convert
136
- # these values to string.
127
+ transformed_apis = []
137
128
  for api in rest_apis:
138
- api['createdDate'] = str(api['createdDate']) if 'createdDate' in api else None
139
-
140
- neo4j_session.run(
141
- ingest_rest_apis,
142
- rest_apis_list=rest_apis,
143
- aws_update_tag=aws_update_tag,
144
- Region=region,
145
- AWS_ACCOUNT_ID=current_aws_account_id,
146
- )
129
+ policy_data = policy_map.get(api['id'], {})
130
+ transformed_api = {
131
+ 'id': api['id'],
132
+ 'createdDate': str(api['createdDate']) if 'createdDate' in api else None,
133
+ 'version': api.get('version'),
134
+ 'minimumCompressionSize': api.get('minimumCompressionSize'),
135
+ 'disableExecuteApiEndpoint': api.get('disableExecuteApiEndpoint'),
136
+ # Set defaults in the transform function
137
+ 'anonymous_access': policy_data.get('internet_accessible', False),
138
+ 'anonymous_actions': policy_data.get('accessible_actions', []),
139
+ # TODO Issue #1452: clarify internet exposure vs anonymous access
140
+ }
141
+ transformed_apis.append(transformed_api)
142
+
143
+ return transformed_apis
147
144
 
148
145
 
149
146
  @timeit
150
- def _load_apigateway_policies(
151
- neo4j_session: neo4j.Session, policies: List, update_tag: int,
147
+ def load_apigateway_rest_apis(
148
+ neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str,
149
+ aws_update_tag: int,
152
150
  ) -> None:
153
151
  """
154
- Ingest API Gateway REST API policy results into neo4j.
155
- """
156
- ingest_policies = """
157
- UNWIND $policies as policy
158
- MATCH (r:APIGatewayRestAPI) where r.name = policy.api_id
159
- SET r.anonymous_access = (coalesce(r.anonymous_access, false) OR policy.internet_accessible),
160
- r.anonymous_actions = coalesce(r.anonymous_actions, []) + policy.accessible_actions,
161
- r.lastupdated = $UpdateTag
152
+ Ingest API Gateway REST API data into neo4j.
162
153
  """
163
-
164
- neo4j_session.run(
165
- ingest_policies,
166
- policies=policies,
167
- UpdateTag=update_tag,
168
- )
169
-
170
-
171
- def _set_default_values(neo4j_session: neo4j.Session, aws_account_id: str) -> None:
172
- set_defaults = """
173
- MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(restApi:APIGatewayRestAPI)
174
- where restApi.anonymous_actions IS NULL
175
- SET restApi.anonymous_access = false, restApi.anonymous_actions = []
176
- """
177
-
178
- neo4j_session.run(
179
- set_defaults,
180
- AWS_ID=aws_account_id,
154
+ load(
155
+ neo4j_session,
156
+ APIGatewayRestAPISchema(),
157
+ data,
158
+ region=region,
159
+ lastupdated=aws_update_tag,
160
+ AWS_ID=current_aws_account_id,
181
161
  )
182
162
 
183
163
 
184
- @timeit
185
- def _load_apigateway_stages(
186
- neo4j_session: neo4j.Session, stages: List, update_tag: int,
187
- ) -> None:
164
+ def transform_apigateway_stages(stages: List[Dict], update_tag: int) -> List[Dict]:
188
165
  """
189
- Ingest the Stage resource details into neo4j.
166
+ Transform API Gateway Stage data for ingestion
190
167
  """
191
- ingest_stages = """
192
- UNWIND $stages_list AS stage
193
- MERGE (s:APIGatewayStage{id: stage.arn})
194
- ON CREATE SET s.firstseen = timestamp(), s.stagename = stage.stageName,
195
- s.createddate = stage.createdDate
196
- SET s.deploymentid = stage.deploymentId,
197
- s.clientcertificateid = stage.clientCertificateId,
198
- s.cacheclusterenabled = stage.cacheClusterEnabled,
199
- s.cacheclusterstatus = stage.cacheClusterStatus,
200
- s.tracingenabled = stage.tracingEnabled,
201
- s.webaclarn = stage.webAclArn,
202
- s.lastupdated = $UpdateTag
203
- WITH s, stage
204
- MATCH (rest_api:APIGatewayRestAPI{id: stage.apiId})
205
- MERGE (rest_api)-[r:ASSOCIATED_WITH]->(s)
206
- ON CREATE SET r.firstseen = timestamp()
207
- SET r.lastupdated = $UpdateTag
208
- """
209
-
210
- # neo4j does not accept datetime objects and values. This loop is used to convert
211
- # these values to string.
168
+ stage_data = []
212
169
  for stage in stages:
213
170
  stage['createdDate'] = str(stage['createdDate'])
214
- stage['arn'] = "arn:aws:apigateway:::" + stage['apiId'] + "/" + stage['stageName']
215
-
216
- neo4j_session.run(
217
- ingest_stages,
218
- stages_list=stages,
219
- UpdateTag=update_tag,
220
- )
171
+ stage['arn'] = f"arn:aws:apigateway:::{stage['apiId']}/{stage['stageName']}"
172
+ stage_data.append(stage)
173
+ return stage_data
221
174
 
222
175
 
223
- @timeit
224
- def _load_apigateway_certificates(
225
- neo4j_session: neo4j.Session, certificates: List, update_tag: int,
226
- ) -> None:
227
- """
228
- Ingest the API Gateway Client Certificate details into neo4j.
176
+ def transform_apigateway_certificates(certificates: List[Dict], update_tag: int) -> List[Dict]:
229
177
  """
230
- ingest_certificates = """
231
- UNWIND $certificates_list as certificate
232
- MERGE (c:APIGatewayClientCertificate{id: certificate.clientCertificateId})
233
- ON CREATE SET c.firstseen = timestamp(), c.createddate = certificate.createdDate
234
- SET c.lastupdated = $UpdateTag, c.expirationdate = certificate.expirationDate
235
- WITH c, certificate
236
- MATCH (stage:APIGatewayStage{id: certificate.stageArn})
237
- MERGE (stage)-[r:HAS_CERTIFICATE]->(c)
238
- ON CREATE SET r.firstseen = timestamp()
239
- SET r.lastupdated = $UpdateTag
178
+ Transform API Gateway Client Certificate data for ingestion
240
179
  """
241
-
242
- # neo4j does not accept datetime objects and values. This loop is used to convert
243
- # these values to string.
180
+ cert_data = []
244
181
  for certificate in certificates:
245
182
  certificate['createdDate'] = str(certificate['createdDate'])
246
183
  certificate['expirationDate'] = str(certificate.get('expirationDate'))
247
- certificate['stageArn'] = "arn:aws:apigateway:::" + certificate['apiId'] + "/" + certificate['stageName']
248
-
249
- neo4j_session.run(
250
- ingest_certificates,
251
- certificates_list=certificates,
252
- UpdateTag=update_tag,
253
- )
254
-
255
-
256
- @timeit
257
- def _load_apigateway_resources(
258
- neo4j_session: neo4j.Session, resources: List, update_tag: int,
259
- ) -> None:
260
- """
261
- Ingest the API Gateway Resource details into neo4j.
262
- """
263
- ingest_resources = """
264
- UNWIND $resources_list AS res
265
- MERGE (s:APIGatewayResource{id: res.id})
266
- ON CREATE SET s.firstseen = timestamp()
267
- SET s.path = res.path,
268
- s.pathpart = res.pathPart,
269
- s.parentid = res.parentId,
270
- s.lastupdated =$UpdateTag
271
- WITH s, res
272
- MATCH (rest_api:APIGatewayRestAPI{id: res.apiId})
273
- MERGE (rest_api)-[r:RESOURCE]->(s)
274
- ON CREATE SET r.firstseen = timestamp()
275
- SET r.lastupdated = $UpdateTag
276
- """
277
-
278
- neo4j_session.run(
279
- ingest_resources,
280
- resources_list=resources,
281
- UpdateTag=update_tag,
282
- )
184
+ certificate['stageArn'] = f"arn:aws:apigateway:::{certificate['apiId']}/{certificate['stageName']}"
185
+ cert_data.append(certificate)
186
+ return cert_data
283
187
 
284
188
 
285
- @timeit
286
- def load_rest_api_details(
287
- neo4j_session: neo4j.Session, stages_certificate_resources: List[Tuple[Any, Any, Any, Any, Any]],
288
- aws_account_id: str, update_tag: int,
289
- ) -> None:
189
+ def transform_rest_api_details(
190
+ stages_certificate_resources: List[Tuple[Any, Any, Any, Any, Any]],
191
+ ) -> Tuple[List[Dict], List[Dict], List[Dict]]:
290
192
  """
291
- Create dictionaries for Stages, Client certificates, policies and Resource resources
292
- so we can import them in a single query
193
+ Transform Stage, Client Certificate, and Resource data for ingestion
293
194
  """
294
195
  stages: List[Dict] = []
295
196
  certificates: List[Dict] = []
296
197
  resources: List[Dict] = []
297
- policies: List = []
298
- for api_id, stage, certificate, resource, policy in stages_certificate_resources:
299
- parsed_policy = parse_policy(api_id, policy)
300
- if parsed_policy is not None:
301
- policies.append(parsed_policy)
198
+
199
+ for api_id, stage, certificate, resource, _ in stages_certificate_resources:
302
200
  if len(stage) > 0:
303
201
  for s in stage:
304
202
  s['apiId'] = api_id
203
+ s['createdDate'] = str(s['createdDate'])
204
+ s['arn'] = f"arn:aws:apigateway:::{api_id}/{s['stageName']}"
305
205
  stages.extend(stage)
206
+
207
+ if certificate:
208
+ certificate['apiId'] = api_id
209
+ certificate['createdDate'] = str(certificate['createdDate'])
210
+ certificate['expirationDate'] = str(certificate.get('expirationDate'))
211
+ certificate['stageArn'] = f"arn:aws:apigateway:::{api_id}/{certificate['stageName']}"
212
+ certificates.append(certificate)
213
+
306
214
  if len(resource) > 0:
307
215
  for r in resource:
308
216
  r['apiId'] = api_id
309
217
  resources.extend(resource)
310
- if certificate:
311
- certificate['apiId'] = api_id
312
- certificates.append(certificate)
313
218
 
314
- # cleanup existing properties
315
- run_cleanup_job(
316
- 'aws_apigateway_details.json',
219
+ return stages, certificates, resources
220
+
221
+
222
+ @timeit
223
+ def load_rest_api_details(
224
+ neo4j_session: neo4j.Session, stages_certificate_resources: List[Tuple[Any, Any, Any, Any, Any]],
225
+ aws_account_id: str, update_tag: int,
226
+ ) -> None:
227
+ """
228
+ Transform and load Stage, Client Certificate, and Resource data
229
+ """
230
+ stages, certificates, resources = transform_rest_api_details(stages_certificate_resources)
231
+
232
+ load(
317
233
  neo4j_session,
318
- {'UPDATE_TAG': update_tag, 'AWS_ID': aws_account_id},
234
+ APIGatewayStageSchema(),
235
+ stages,
236
+ lastupdated=update_tag,
237
+ AWS_ID=aws_account_id,
319
238
  )
320
239
 
321
- _load_apigateway_policies(neo4j_session, policies, update_tag)
322
- _load_apigateway_stages(neo4j_session, stages, update_tag)
323
- _load_apigateway_certificates(neo4j_session, certificates, update_tag)
324
- _load_apigateway_resources(neo4j_session, resources, update_tag)
325
- _set_default_values(neo4j_session, aws_account_id)
240
+ load(
241
+ neo4j_session,
242
+ APIGatewayClientCertificateSchema(),
243
+ certificates,
244
+ lastupdated=update_tag,
245
+ AWS_ID=aws_account_id,
246
+ )
247
+
248
+ load(
249
+ neo4j_session,
250
+ APIGatewayResourceSchema(),
251
+ resources,
252
+ lastupdated=update_tag,
253
+ AWS_ID=aws_account_id,
254
+ )
326
255
 
327
256
 
328
257
  @timeit
@@ -353,7 +282,27 @@ def parse_policy(api_id: str, policy: Policy) -> Optional[Dict[Any, Any]]:
353
282
 
354
283
  @timeit
355
284
  def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
356
- run_cleanup_job('aws_import_apigateway_cleanup.json', neo4j_session, common_job_parameters)
285
+ """
286
+ Delete out-of-date API Gateway resources and relationships.
287
+ Order matters - clean up certificates, stages, and resources before cleaning up the REST APIs they connect to.
288
+ """
289
+ logger.info("Running API Gateway cleanup job.")
290
+
291
+ # Clean up certificates first
292
+ cleanup_job = GraphJob.from_node_schema(APIGatewayClientCertificateSchema(), common_job_parameters)
293
+ cleanup_job.run(neo4j_session)
294
+
295
+ # Then stages
296
+ cleanup_job = GraphJob.from_node_schema(APIGatewayStageSchema(), common_job_parameters)
297
+ cleanup_job.run(neo4j_session)
298
+
299
+ # Then resources
300
+ cleanup_job = GraphJob.from_node_schema(APIGatewayResourceSchema(), common_job_parameters)
301
+ cleanup_job.run(neo4j_session)
302
+
303
+ # Finally REST APIs
304
+ cleanup_job = GraphJob.from_node_schema(APIGatewayRestAPISchema(), common_job_parameters)
305
+ cleanup_job.run(neo4j_session)
357
306
 
358
307
 
359
308
  @timeit
@@ -362,9 +311,23 @@ def sync_apigateway_rest_apis(
362
311
  aws_update_tag: int,
363
312
  ) -> None:
364
313
  rest_apis = get_apigateway_rest_apis(boto3_session, region)
365
- load_apigateway_rest_apis(neo4j_session, rest_apis, region, current_aws_account_id, aws_update_tag)
366
-
367
314
  stages_certificate_resources = get_rest_api_details(boto3_session, rest_apis, region)
315
+
316
+ # Extract policies and transform the data
317
+ policies = []
318
+ for api_id, _, _, _, policy in stages_certificate_resources:
319
+ parsed_policy = parse_policy(api_id, policy)
320
+ if parsed_policy is not None:
321
+ policies.append(parsed_policy)
322
+
323
+ transformed_apis = transform_apigateway_rest_apis(
324
+ rest_apis,
325
+ policies,
326
+ region,
327
+ current_aws_account_id,
328
+ aws_update_tag,
329
+ )
330
+ load_apigateway_rest_apis(neo4j_session, transformed_apis, region, current_aws_account_id, aws_update_tag)
368
331
  load_rest_api_details(neo4j_session, stages_certificate_resources, current_aws_account_id, aws_update_tag)
369
332
 
370
333
 
@@ -8,8 +8,8 @@ import neo4j
8
8
  from botocore.exceptions import ClientError
9
9
 
10
10
  from cartography.client.core.tx import load
11
+ from cartography.client.core.tx import read_list_of_values_tx
11
12
  from cartography.graph.job import GraphJob
12
- from cartography.intel.aws.ec2 import get_ec2_regions
13
13
  from cartography.intel.aws.ec2.util import get_botocore_config
14
14
  from cartography.models.aws.ec2.images import EC2ImageSchema
15
15
  from cartography.util import aws_handle_regions
@@ -21,22 +21,26 @@ logger = logging.getLogger(__name__)
21
21
  @timeit
22
22
  def get_images_in_use(neo4j_session: neo4j.Session, region: str, current_aws_account_id: str) -> List[str]:
23
23
  get_images_query = """
24
+ CALL {
24
25
  MATCH (:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(i:EC2Instance)
25
- WHERE i.region = $Region
26
- RETURN DISTINCT(i.imageid) as image
27
- UNION
26
+ WHERE i.region = $Region AND i.imageid IS NOT NULL
27
+ RETURN i.imageid AS image
28
+ UNION ALL
28
29
  MATCH (:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(lc:LaunchConfiguration)
29
- WHERE lc.region = $Region
30
- RETURN DISTINCT(lc.image_id) as image
31
- UNION
30
+ WHERE lc.region = $Region AND lc.image_id IS NOT NULL
31
+ RETURN lc.image_id AS image
32
+ UNION ALL
32
33
  MATCH (:AWSAccount{id: $AWS_ACCOUNT_ID})-[:RESOURCE]->(ltv:LaunchTemplateVersion)
33
- WHERE ltv.region = $Region
34
- RETURN DISTINCT(ltv.image_id) as image
34
+ WHERE ltv.region = $Region AND ltv.image_id IS NOT NULL
35
+ RETURN ltv.image_id AS image
36
+ }
37
+ RETURN DISTINCT image;
35
38
  """
36
- results = neo4j_session.run(get_images_query, AWS_ACCOUNT_ID=current_aws_account_id, Region=region)
37
- images = []
38
- for r in results:
39
- images.append(r['image'])
39
+ result = read_list_of_values_tx(
40
+ neo4j_session, get_images_query,
41
+ AWS_ACCOUNT_ID=current_aws_account_id, Region=region,
42
+ )
43
+ images = [str(image) for image in result]
40
44
  return images
41
45
 
42
46
 
@@ -45,39 +49,23 @@ def get_images_in_use(neo4j_session: neo4j.Session, region: str, current_aws_acc
45
49
  def get_images(boto3_session: boto3.session.Session, region: str, image_ids: List[str]) -> List[Dict]:
46
50
  client = boto3_session.client('ec2', region_name=region, config=get_botocore_config())
47
51
  images = []
52
+ self_images = []
48
53
  try:
49
54
  self_images = client.describe_images(Owners=['self'])['Images']
50
- images.extend(self_images)
51
- except ClientError as e:
52
- logger.warning(f"Failed to retrieve private images for region - {region}. Error - {e}")
53
- try:
54
- if image_ids:
55
- image_ids = [image_id for image_id in image_ids if image_id is not None]
56
- images_in_use = client.describe_images(ImageIds=image_ids)['Images']
57
- # Ensure we're not adding duplicates
58
- _ids = [image["ImageId"] for image in images]
59
- for image in images_in_use:
60
- if image["ImageId"] not in _ids:
61
- images.append(image)
62
- _ids.append(image["ImageId"])
63
- # Handle cross region image ids
64
- if len(_ids) != len(image_ids):
65
- logger.info("Attempting to retrieve images from other regions")
66
- pending_ids = [image_id for image_id in image_ids if image_id not in _ids]
67
- all_regions = get_ec2_regions(boto3_session)
68
- clients = {
69
- other_region: boto3_session.client('ec2', region_name=other_region, config=get_botocore_config())
70
- for other_region in all_regions if other_region != region
71
- }
72
- for other_region, client in clients.items():
73
- for _id in pending_ids:
74
- try:
75
- pending_image = client.describe_images(ImageIds=[_id])['Images']
76
- images.extend(pending_image)
77
- except ClientError as e:
78
- logger.warning(f"Image {id} could not be found at region - {other_region}. Error - {e}")
79
55
  except ClientError as e:
80
- logger.warning(f"Failed to retrieve public images for region - {region}. Error - {e}")
56
+ logger.warning(f"Failed retrieve self owned images for region - {region}. Error - {e}")
57
+ images.extend(self_images)
58
+ if image_ids:
59
+ self_image_ids = {image['ImageId'] for image in images}
60
+ # Go one by one to avoid losing all images if one fails
61
+ for image in image_ids:
62
+ if image in self_image_ids:
63
+ continue
64
+ try:
65
+ public_images = client.describe_images(ImageIds=[image])['Images']
66
+ images.extend(public_images)
67
+ except ClientError as e:
68
+ logger.warning(f"Failed retrieve image id {image} for region - {region}. Error - {e}")
81
69
  return images
82
70
 
83
71
 
@@ -36,7 +36,10 @@ def get_ecr_repository_images(boto3_session: boto3.session.Session, region: str,
36
36
  paginator = client.get_paginator('list_images')
37
37
  ecr_repository_images: List[Dict] = []
38
38
  for page in paginator.paginate(repositoryName=repository_name):
39
- ecr_repository_images.extend(page['imageIds'])
39
+ image_ids = page['imageIds']
40
+ if image_ids:
41
+ response = client.describe_images(repositoryName=repository_name, imageIds=image_ids)
42
+ ecr_repository_images.extend(response['imageDetails'])
40
43
  return ecr_repository_images
41
44
 
42
45
 
@@ -103,7 +106,12 @@ def _load_ecr_repo_img_tx(
103
106
  ON CREATE SET ri.firstseen = timestamp()
104
107
  SET ri.lastupdated = $aws_update_tag,
105
108
  ri.tag = repo_img.imageTag,
106
- ri.uri = repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, '')
109
+ ri.uri = repo_img.repo_uri + COALESCE(":" + repo_img.imageTag, ''),
110
+ ri.image_size_bytes = repo_img.imageSizeInBytes,
111
+ ri.image_pushed_at = repo_img.imagePushedAt,
112
+ ri.image_manifest_media_type = repo_img.imageManifestMediaType,
113
+ ri.artifact_media_type = repo_img.artifactMediaType,
114
+ ri.last_recorded_pull_time = repo_img.lastRecordedPullTime
107
115
  WITH ri, repo_img
108
116
 
109
117
  MERGE (img:ECRImage{id: repo_img.imageDigest})
@@ -17,21 +17,23 @@ from cartography.intel.gcp import compute
17
17
  from cartography.intel.gcp import crm
18
18
  from cartography.intel.gcp import dns
19
19
  from cartography.intel.gcp import gke
20
+ from cartography.intel.gcp import iam
20
21
  from cartography.intel.gcp import storage
21
22
  from cartography.util import run_analysis_job
22
23
  from cartography.util import timeit
23
24
 
24
25
  logger = logging.getLogger(__name__)
25
- Resources = namedtuple('Resources', 'compute container crm_v1 crm_v2 dns storage serviceusage')
26
+ Resources = namedtuple('Resources', 'compute container crm_v1 crm_v2 dns storage serviceusage iam')
26
27
 
27
28
  # Mapping of service short names to their full names as in docs. See https://developers.google.com/apis-explorer,
28
29
  # and https://cloud.google.com/service-usage/docs/reference/rest/v1/services#ServiceConfig
29
- Services = namedtuple('Services', 'compute storage gke dns')
30
+ Services = namedtuple('Services', 'compute storage gke dns iam')
30
31
  service_names = Services(
31
32
  compute='compute.googleapis.com',
32
33
  storage='storage.googleapis.com',
33
34
  gke='container.googleapis.com',
34
35
  dns='dns.googleapis.com',
36
+ iam='iam.googleapis.com',
35
37
  )
36
38
 
37
39
 
@@ -112,6 +114,13 @@ def _get_serviceusage_resource(credentials: GoogleCredentials) -> Resource:
112
114
  return googleapiclient.discovery.build('serviceusage', 'v1', credentials=credentials, cache_discovery=False)
113
115
 
114
116
 
117
+ def _get_iam_resource(credentials: GoogleCredentials) -> Resource:
118
+ """
119
+ Instantiates a Google IAM resource object to call the IAM API.
120
+ """
121
+ return googleapiclient.discovery.build('iam', 'v1', credentials=credentials, cache_discovery=False)
122
+
123
+
115
124
  def _initialize_resources(credentials: GoogleCredentials) -> Resource:
116
125
  """
117
126
  Create namedtuple of all resource objects necessary for GCP data gathering.
@@ -126,6 +135,7 @@ def _initialize_resources(credentials: GoogleCredentials) -> Resource:
126
135
  container=None,
127
136
  dns=None,
128
137
  storage=None,
138
+ iam=_get_iam_resource(credentials),
129
139
  )
130
140
 
131
141
 
@@ -286,6 +296,18 @@ def _sync_multiple_projects(
286
296
  logger.info("Syncing GCP project %s for DNS", project_id)
287
297
  _sync_single_project_dns(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
288
298
 
299
+ # IAM data sync
300
+ for project in projects:
301
+ project_id = project['projectId']
302
+ logger.info("Syncing GCP project %s for IAM", project_id)
303
+ iam.sync(
304
+ neo4j_session,
305
+ resources.iam,
306
+ project_id,
307
+ gcp_update_tag,
308
+ common_job_parameters,
309
+ )
310
+
289
311
 
290
312
  @timeit
291
313
  def get_gcp_credentials() -> GoogleCredentials: