cartography 0.110.0rc1__py3-none-any.whl → 0.111.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (59) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +0 -8
  3. cartography/config.py +0 -9
  4. cartography/data/indexes.cypher +0 -2
  5. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  6. cartography/graph/querybuilder.py +70 -0
  7. cartography/intel/aws/apigateway.py +111 -4
  8. cartography/intel/aws/cognito.py +201 -0
  9. cartography/intel/aws/ec2/vpc.py +140 -124
  10. cartography/intel/aws/ecs.py +7 -1
  11. cartography/intel/aws/eventbridge.py +73 -0
  12. cartography/intel/aws/glue.py +64 -0
  13. cartography/intel/aws/kms.py +13 -1
  14. cartography/intel/aws/rds.py +105 -0
  15. cartography/intel/aws/resources.py +2 -0
  16. cartography/intel/aws/route53.py +3 -1
  17. cartography/intel/aws/s3.py +104 -0
  18. cartography/intel/entra/__init__.py +41 -43
  19. cartography/intel/entra/applications.py +2 -1
  20. cartography/intel/entra/ou.py +1 -1
  21. cartography/intel/github/__init__.py +21 -25
  22. cartography/intel/github/repos.py +13 -38
  23. cartography/intel/kubernetes/__init__.py +4 -0
  24. cartography/intel/kubernetes/rbac.py +464 -0
  25. cartography/intel/kubernetes/util.py +17 -0
  26. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  27. cartography/models/aws/cognito/__init__.py +0 -0
  28. cartography/models/aws/cognito/identity_pool.py +70 -0
  29. cartography/models/aws/cognito/user_pool.py +47 -0
  30. cartography/models/aws/ec2/security_groups.py +1 -1
  31. cartography/models/aws/ec2/vpc.py +46 -0
  32. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  33. cartography/models/aws/ecs/services.py +17 -0
  34. cartography/models/aws/ecs/tasks.py +1 -0
  35. cartography/models/aws/eventbridge/target.py +71 -0
  36. cartography/models/aws/glue/job.py +69 -0
  37. cartography/models/aws/rds/event_subscription.py +146 -0
  38. cartography/models/aws/route53/dnsrecord.py +21 -0
  39. cartography/models/github/dependencies.py +1 -2
  40. cartography/models/kubernetes/clusterrolebindings.py +98 -0
  41. cartography/models/kubernetes/clusterroles.py +52 -0
  42. cartography/models/kubernetes/rolebindings.py +119 -0
  43. cartography/models/kubernetes/roles.py +76 -0
  44. cartography/models/kubernetes/serviceaccounts.py +77 -0
  45. cartography/models/tailscale/device.py +1 -0
  46. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/METADATA +3 -3
  47. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/RECORD +57 -43
  48. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  49. cartography/intel/entra/resources.py +0 -20
  50. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  51. /cartography/models/aws/{__init__.py → apigateway/__init__.py} +0 -0
  52. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  53. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  54. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  55. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  56. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/WHEEL +0 -0
  57. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/entry_points.txt +0 -0
  58. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/licenses/LICENSE +0 -0
  59. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -1,7 +1,14 @@
1
1
  # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
3
 
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
5
12
 
6
13
  TYPE_CHECKING = False
7
14
  if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
9
16
  from typing import Union
10
17
 
11
18
  VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
12
20
  else:
13
21
  VERSION_TUPLE = object
22
+ COMMIT_ID = object
14
23
 
15
24
  version: str
16
25
  __version__: str
17
26
  __version_tuple__: VERSION_TUPLE
18
27
  version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
19
30
 
20
- __version__ = version = '0.110.0rc1'
21
- __version_tuple__ = version_tuple = (0, 110, 0, 'rc1')
31
+ __version__ = version = '0.111.0rc1'
32
+ __version_tuple__ = version_tuple = (0, 111, 0, 'rc1')
33
+
34
+ __commit_id__ = commit_id = None
cartography/cli.py CHANGED
@@ -254,14 +254,6 @@ class CLI:
254
254
  "The name of environment variable containing Entra Client Secret for Service Principal Authentication."
255
255
  ),
256
256
  )
257
- parser.add_argument(
258
- "--entra-best-effort-mode",
259
- action="store_true",
260
- help=(
261
- "Enable Entra ID sync best effort mode. This will allow cartography to continue "
262
- "syncing other Entra ID entities and delay raising an exception until the end of the sync."
263
- ),
264
- )
265
257
  parser.add_argument(
266
258
  "--aws-requested-syncs",
267
259
  type=str,
cartography/config.py CHANGED
@@ -51,9 +51,6 @@ class Config:
51
51
  :param entra_client_id: Client Id for connecting in a Service Principal Authentication approach. Optional.
52
52
  :type entra_client_secret: str
53
53
  :param entra_client_secret: Client Secret for connecting in a Service Principal Authentication approach. Optional.
54
- :type entra_best_effort_mode: bool
55
- :param entra_best_effort_mode: If True, Entra ID sync will continue on errors and raise an aggregated
56
- exception at the end of the sync. If False (default), exceptions will be raised immediately.
57
54
  :type aws_requested_syncs: str
58
55
  :param aws_requested_syncs: Comma-separated list of AWS resources to sync. Optional.
59
56
  :type aws_guardduty_severity_threshold: str
@@ -167,8 +164,6 @@ class Config:
167
164
  :param sentinelone_api_url: SentinelOne API URL. Optional.
168
165
  :type sentinelone_api_token: string
169
166
  :param sentinelone_api_token: SentinelOne API token for authentication. Optional.
170
- :type sentinelone_api_token_env_var: string
171
- :param sentinelone_api_token_env_var: The name of an environment variable containing the SentinelOne API token. Optional.
172
167
  :type sentinelone_account_ids: list[str]
173
168
  :param sentinelone_account_ids: List of SentinelOne account IDs to sync. Optional.
174
169
  """
@@ -194,7 +189,6 @@ class Config:
194
189
  entra_tenant_id=None,
195
190
  entra_client_id=None,
196
191
  entra_client_secret=None,
197
- entra_best_effort_mode=False,
198
192
  aws_requested_syncs=None,
199
193
  aws_guardduty_severity_threshold=None,
200
194
  analysis_job_directory=None,
@@ -257,7 +251,6 @@ class Config:
257
251
  scaleway_org=None,
258
252
  sentinelone_api_url=None,
259
253
  sentinelone_api_token=None,
260
- sentinelone_api_token_env_var=None,
261
254
  sentinelone_account_ids=None,
262
255
  ):
263
256
  self.neo4j_uri = neo4j_uri
@@ -281,7 +274,6 @@ class Config:
281
274
  self.entra_tenant_id = entra_tenant_id
282
275
  self.entra_client_id = entra_client_id
283
276
  self.entra_client_secret = entra_client_secret
284
- self.entra_best_effort_mode = entra_best_effort_mode
285
277
  self.aws_requested_syncs = aws_requested_syncs
286
278
  self.aws_guardduty_severity_threshold = aws_guardduty_severity_threshold
287
279
  self.analysis_job_directory = analysis_job_directory
@@ -344,5 +336,4 @@ class Config:
344
336
  self.scaleway_org = scaleway_org
345
337
  self.sentinelone_api_url = sentinelone_api_url
346
338
  self.sentinelone_api_token = sentinelone_api_token
347
- self.sentinelone_api_token_env_var = sentinelone_api_token_env_var
348
339
  self.sentinelone_account_ids = sentinelone_account_ids
@@ -51,8 +51,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:AWSTransitGatewayAttachment) ON (n.lastupdated
51
51
  CREATE INDEX IF NOT EXISTS FOR (n:AWSUser) ON (n.arn);
52
52
  CREATE INDEX IF NOT EXISTS FOR (n:AWSUser) ON (n.name);
53
53
  CREATE INDEX IF NOT EXISTS FOR (n:AWSUser) ON (n.lastupdated);
54
- CREATE INDEX IF NOT EXISTS FOR (n:AWSVpc) ON (n.id);
55
- CREATE INDEX IF NOT EXISTS FOR (n:AWSVpc) ON (n.lastupdated);
56
54
  CREATE INDEX IF NOT EXISTS FOR (n:AccountAccessKey) ON (n.accesskeyid);
57
55
  CREATE INDEX IF NOT EXISTS FOR (n:AccountAccessKey) ON (n.lastupdated);
58
56
  CREATE INDEX IF NOT EXISTS FOR (n:AutoScalingGroup) ON (n.arn);
@@ -22,8 +22,8 @@
22
22
  "iterative": false
23
23
  },
24
24
  {
25
- "__comment__": "Attach EC2KeyPairs with matching fingerprints to eachother and set duplicate_keyfingerprint = True",
26
- "query": "MATCH (k1:EC2KeyPair), (k2:EC2KeyPair) WHERE k1.id <> k2.id AND k1.keyfingerprint = k2.keyfingerprint SET k1.duplicate_keyfingerprint = True, k2.duplicate_keyfingerprint = True MERGE (k1)-[r:MATCHING_FINGERPRINT]-(k2) ON CREATE SET r.firstseen = $UPDATE_TAG SET r.lastupdated = $UPDATE_TAG return COUNT(*) as TotalCompleted",
25
+ "__comment__": "Attach EC2KeyPairs with matching fingerprints to each other and set duplicate_keyfingerprint = True. Use id(k1) < id(k2) to avoid Cartesian product warning and ensure O(1) comparison.",
26
+ "query": "MATCH (k1:EC2KeyPair) MATCH (k2:EC2KeyPair) WHERE id(k1) < id(k2) AND k1.keyfingerprint = k2.keyfingerprint SET k1.duplicate_keyfingerprint = True, k2.duplicate_keyfingerprint = True MERGE (k1)-[r:MATCHING_FINGERPRINT]-(k2) ON CREATE SET r.firstseen = $UPDATE_TAG SET r.lastupdated = $UPDATE_TAG RETURN COUNT(*) as TotalCompleted",
27
27
  "iterative": false
28
28
  }
29
29
  ]
@@ -1,5 +1,7 @@
1
1
  import logging
2
2
  from dataclasses import asdict
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version
3
5
  from string import Template
4
6
  from typing import Dict
5
7
  from typing import List
@@ -223,6 +225,8 @@ def _build_attach_sub_resource_statement(
223
225
  $RelMergeClause
224
226
  ON CREATE SET r.firstseen = timestamp()
225
227
  SET
228
+ r._module_name = "$module_name",
229
+ r._module_version = "$module_version",
226
230
  $set_rel_properties_statement
227
231
  """,
228
232
  )
@@ -244,6 +248,8 @@ def _build_attach_sub_resource_statement(
244
248
  SubResourceLabel=sub_resource_link.target_node_label,
245
249
  MatchClause=_build_match_clause(sub_resource_link.target_node_matcher),
246
250
  RelMergeClause=rel_merge_clause,
251
+ module_name=_get_module_from_schema(sub_resource_link),
252
+ module_version=_get_cartography_version(),
247
253
  SubResourceRelLabel=sub_resource_link.rel_label,
248
254
  set_rel_properties_statement=_build_rel_properties_statement(
249
255
  "r",
@@ -278,6 +284,8 @@ def _build_attach_additional_links_statement(
278
284
  $RelMerge
279
285
  ON CREATE SET $rel_var.firstseen = timestamp()
280
286
  SET
287
+ $rel_var._module_name = "$module_name",
288
+ $rel_var._module_version = "$module_version",
281
289
  $set_rel_properties_statement
282
290
  """,
283
291
  )
@@ -312,6 +320,8 @@ def _build_attach_additional_links_statement(
312
320
  node_var=node_var,
313
321
  rel_var=rel_var,
314
322
  RelMerge=rel_merge,
323
+ module_name=_get_module_from_schema(link),
324
+ module_version=_get_cartography_version(),
315
325
  set_rel_properties_statement=_build_rel_properties_statement(
316
326
  rel_var,
317
327
  rel_props_as_dict,
@@ -453,6 +463,8 @@ def build_ingestion_query(
453
463
  MERGE (i:$node_label{id: $dict_id_field})
454
464
  ON CREATE SET i.firstseen = timestamp()
455
465
  SET
466
+ i._module_name = "$module_name",
467
+ i._module_version = "$module_version",
456
468
  $set_node_properties_statement
457
469
  $attach_relationships_statement
458
470
  """,
@@ -475,6 +487,8 @@ def build_ingestion_query(
475
487
  ingest_query = query_template.safe_substitute(
476
488
  node_label=node_schema.label,
477
489
  dict_id_field=node_props.id,
490
+ module_name=_get_module_from_schema(node_schema),
491
+ module_version=_get_cartography_version(),
478
492
  set_node_properties_statement=_build_node_properties_statement(
479
493
  node_props_as_dict,
480
494
  node_schema.extra_node_labels,
@@ -650,6 +664,8 @@ def build_matchlink_query(rel_schema: CartographyRelSchema) -> str:
650
664
  MERGE $rel
651
665
  ON CREATE SET r.firstseen = timestamp()
652
666
  SET
667
+ r._module_name = "$module_name",
668
+ r._module_version = "$module_version",
653
669
  $set_rel_properties_statement;
654
670
  """
655
671
  )
@@ -677,8 +693,62 @@ def build_matchlink_query(rel_schema: CartographyRelSchema) -> str:
677
693
  source_match=source_match,
678
694
  target_match=target_match,
679
695
  rel=rel,
696
+ module_name=_get_module_from_schema(rel_schema),
697
+ module_version=_get_cartography_version(),
680
698
  set_rel_properties_statement=_build_rel_properties_statement(
681
699
  "r",
682
700
  rel_props_as_dict,
683
701
  ),
684
702
  )
703
+
704
+
705
+ def _get_cartography_version() -> str:
706
+ """
707
+ Get the current version of the cartography package.
708
+
709
+ This function attempts to retrieve the version of the installed cartography package
710
+ using importlib.metadata. If the package is not found (typically in development
711
+ or testing environments), it returns 'dev' as a fallback.
712
+
713
+ Returns:
714
+ The version string of the cartography package, or 'dev' if not found
715
+ """
716
+ try:
717
+ return version("cartography")
718
+ except PackageNotFoundError:
719
+ # This can occured if the cartography package is not installed in the environment, typically in development or testing environments.
720
+ logger.warning("cartography package not found. Returning 'dev' version.")
721
+ # Fallback to reading the VERSION file if the package is not found
722
+ return "dev"
723
+
724
+
725
+ def _get_module_from_schema(
726
+ schema, #: "CartographyNodeSchema" | "CartographyRelSchema",
727
+ ) -> str:
728
+ """
729
+ Extract the module name from a Cartography schema object.
730
+
731
+ This function extracts and formats the module name from a CartographyNodeSchema
732
+ or CartographyRelSchema object. It expects schemas to be part of the official
733
+ cartography.models package hierarchy and returns a formatted string indicating
734
+ the specific cartography module.
735
+
736
+ Args:
737
+ schema: A CartographyNodeSchema or CartographyRelSchema object
738
+
739
+ Returns:
740
+ A formatted module name string in the format 'cartography:<module_name>'
741
+ or 'unknown:<full_module_path>' if the schema is not from cartography.models
742
+ """
743
+ # If the entity schema does not belong to the cartography.models package,
744
+ # we log a warning and return the full module path.
745
+ if not schema.__module__.startswith("cartography.models."):
746
+ logger.warning(
747
+ "The schema %s does not start with 'cartography.models.'. "
748
+ "This may indicate that the schema is not part of the official cartography models.",
749
+ schema.__module__,
750
+ )
751
+ return f"unknown:{schema.__module__}"
752
+ # Otherwise, we return the module path as a string.
753
+ parts = schema.__module__.split(".")
754
+ return f"cartography:{parts[2]}"
@@ -14,12 +14,18 @@ from policyuniverse.policy import Policy
14
14
 
15
15
  from cartography.client.core.tx import load
16
16
  from cartography.graph.job import GraphJob
17
- from cartography.models.aws.apigateway import APIGatewayRestAPISchema
18
- from cartography.models.aws.apigatewaycertificate import (
17
+ from cartography.intel.aws.ec2.util import get_botocore_config
18
+ from cartography.models.aws.apigateway.apigateway import APIGatewayRestAPISchema
19
+ from cartography.models.aws.apigateway.apigatewaycertificate import (
19
20
  APIGatewayClientCertificateSchema,
20
21
  )
21
- from cartography.models.aws.apigatewayresource import APIGatewayResourceSchema
22
- from cartography.models.aws.apigatewaystage import APIGatewayStageSchema
22
+ from cartography.models.aws.apigateway.apigatewaydeployment import (
23
+ APIGatewayDeploymentSchema,
24
+ )
25
+ from cartography.models.aws.apigateway.apigatewayresource import (
26
+ APIGatewayResourceSchema,
27
+ )
28
+ from cartography.models.aws.apigateway.apigatewaystage import APIGatewayStageSchema
23
29
  from cartography.util import aws_handle_regions
24
30
  from cartography.util import timeit
25
31
 
@@ -40,6 +46,38 @@ def get_apigateway_rest_apis(
40
46
  return apis
41
47
 
42
48
 
49
+ def get_rest_api_ids(
50
+ rest_apis: List[Dict],
51
+ ) -> List[str]:
52
+ """
53
+ Extracts the IDs of the REST APIs from the provided list.
54
+ """
55
+ return [api["id"] for api in rest_apis if "id" in api]
56
+
57
+
58
+ @timeit
59
+ @aws_handle_regions
60
+ def get_rest_api_deployments(
61
+ boto3_session: boto3.session.Session,
62
+ rest_api_ids: List[str],
63
+ region: str,
64
+ ) -> List[Dict[str, Any]]:
65
+ """
66
+ Retrieves the deployments for each REST API in the provided list.
67
+ """
68
+ client = boto3_session.client(
69
+ "apigateway", region_name=region, config=get_botocore_config()
70
+ )
71
+ deployments: List[Dict[str, Any]] = []
72
+ for api_id in rest_api_ids:
73
+ paginator = client.get_paginator("get_deployments")
74
+ for page in paginator.paginate(restApiId=api_id):
75
+ for deployment in page.get("items", []):
76
+ deployment["api_id"] = api_id
77
+ deployments.append(deployment)
78
+ return deployments
79
+
80
+
43
81
  @timeit
44
82
  @aws_handle_regions
45
83
  def get_rest_api_details(
@@ -244,6 +282,25 @@ def transform_rest_api_details(
244
282
  return stages, certificates, resources
245
283
 
246
284
 
285
+ def transform_apigateway_deployments(
286
+ deployments: List[Dict[str, Any]],
287
+ region: str,
288
+ ) -> List[Dict[str, Any]]:
289
+ """
290
+ Transform API Gateway Deployment data for ingestion
291
+ """
292
+ transformed_deployments = []
293
+ for deployment in deployments:
294
+ transformed_deployment = {
295
+ "id": f"{deployment['api_id']}/{deployment['id']}",
296
+ "api_id": deployment["api_id"],
297
+ "description": deployment.get("description"),
298
+ "region": region,
299
+ }
300
+ transformed_deployments.append(transformed_deployment)
301
+ return transformed_deployments
302
+
303
+
247
304
  @timeit
248
305
  def load_rest_api_details(
249
306
  neo4j_session: neo4j.Session,
@@ -283,6 +340,30 @@ def load_rest_api_details(
283
340
  )
284
341
 
285
342
 
343
+ @timeit
344
+ def load_apigateway_deployments(
345
+ neo4j_session: neo4j.Session,
346
+ data: List[Dict[str, Any]],
347
+ region: str,
348
+ current_aws_account_id: str,
349
+ aws_update_tag: int,
350
+ ) -> None:
351
+ """
352
+ Load API Gateway Deployment data into neo4j.
353
+ """
354
+ logger.info(
355
+ f"Loading API Gateway {len(data)} deployments for region '{region}' into graph.",
356
+ )
357
+ load(
358
+ neo4j_session,
359
+ APIGatewayDeploymentSchema(),
360
+ data,
361
+ region=region,
362
+ lastupdated=aws_update_tag,
363
+ AWS_ID=current_aws_account_id,
364
+ )
365
+
366
+
286
367
  @timeit
287
368
  def parse_policy(api_id: str, policy: Policy) -> Optional[Dict[Any, Any]]:
288
369
  """
@@ -345,6 +426,12 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
345
426
  )
346
427
  cleanup_job.run(neo4j_session)
347
428
 
429
+ cleanup_job = GraphJob.from_node_schema(
430
+ APIGatewayDeploymentSchema(),
431
+ common_job_parameters,
432
+ )
433
+ cleanup_job.run(neo4j_session)
434
+
348
435
 
349
436
  @timeit
350
437
  def sync_apigateway_rest_apis(
@@ -375,6 +462,19 @@ def sync_apigateway_rest_apis(
375
462
  current_aws_account_id,
376
463
  aws_update_tag,
377
464
  )
465
+
466
+ api_ids = get_rest_api_ids(rest_apis)
467
+ deployments = get_rest_api_deployments(
468
+ boto3_session,
469
+ api_ids,
470
+ region,
471
+ )
472
+
473
+ transformed_deployments = transform_apigateway_deployments(
474
+ deployments,
475
+ region,
476
+ )
477
+
378
478
  load_apigateway_rest_apis(
379
479
  neo4j_session,
380
480
  transformed_apis,
@@ -388,6 +488,13 @@ def sync_apigateway_rest_apis(
388
488
  current_aws_account_id,
389
489
  aws_update_tag,
390
490
  )
491
+ load_apigateway_deployments(
492
+ neo4j_session,
493
+ transformed_deployments,
494
+ region,
495
+ current_aws_account_id,
496
+ aws_update_tag,
497
+ )
391
498
 
392
499
 
393
500
  @timeit
@@ -0,0 +1,201 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.cognito.identity_pool import CognitoIdentityPoolSchema
13
+ from cartography.models.aws.cognito.user_pool import CognitoUserPoolSchema
14
+ from cartography.util import aws_handle_regions
15
+ from cartography.util import timeit
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @timeit
21
+ @aws_handle_regions
22
+ def get_identity_pools(
23
+ boto3_session: boto3.Session, region: str
24
+ ) -> List[Dict[str, Any]]:
25
+ client = boto3_session.client(
26
+ "cognito-identity", region_name=region, config=get_botocore_config()
27
+ )
28
+ paginator = client.get_paginator("list_identity_pools")
29
+
30
+ all_identity_pools = []
31
+
32
+ for page in paginator.paginate(MaxResults=50):
33
+ identity_pools = page.get("IdentityPools", [])
34
+ all_identity_pools.extend(identity_pools)
35
+ return all_identity_pools
36
+
37
+
38
+ @timeit
39
+ @aws_handle_regions
40
+ def get_identity_pool_roles(
41
+ boto3_session: boto3.Session, identity_pools: List[Dict[str, Any]], region: str
42
+ ) -> List[Dict[str, Any]]:
43
+ client = boto3_session.client(
44
+ "cognito-identity", region_name=region, config=get_botocore_config()
45
+ )
46
+ all_identity_pool_details = []
47
+ for identity_pool in identity_pools:
48
+ response = client.get_identity_pool_roles(
49
+ IdentityPoolId=identity_pool["IdentityPoolId"]
50
+ )
51
+ all_identity_pool_details.append(response)
52
+ return all_identity_pool_details
53
+
54
+
55
+ @timeit
56
+ @aws_handle_regions
57
+ def get_user_pools(boto3_session: boto3.Session, region: str) -> List[Dict[str, Any]]:
58
+ client = boto3_session.client(
59
+ "cognito-idp", region_name=region, config=get_botocore_config()
60
+ )
61
+ paginator = client.get_paginator("list_user_pools")
62
+ all_user_pools = []
63
+
64
+ for page in paginator.paginate(MaxResults=50):
65
+ user_pools = page.get("UserPools", [])
66
+ all_user_pools.extend(user_pools)
67
+ return all_user_pools
68
+
69
+
70
+ def transform_identity_pools(
71
+ identity_pools: List[Dict[str, Any]], region: str
72
+ ) -> List[Dict[str, Any]]:
73
+ transformed_identity_pools = []
74
+ for pool in identity_pools:
75
+ transformed_pool = {
76
+ "IdentityPoolId": pool["IdentityPoolId"],
77
+ "Region": region,
78
+ "Roles": list(pool.get("Roles", {}).values()),
79
+ }
80
+ transformed_identity_pools.append(transformed_pool)
81
+ return transformed_identity_pools
82
+
83
+
84
+ def transform_user_pools(
85
+ user_pools: List[Dict[str, Any]], region: str
86
+ ) -> List[Dict[str, Any]]:
87
+ transformed_user_pools = []
88
+ for pool in user_pools:
89
+ transformed_pool = {
90
+ "Id": pool["Id"],
91
+ "Region": region,
92
+ "Name": pool["Name"],
93
+ "Status": pool.get("Status"),
94
+ }
95
+ transformed_user_pools.append(transformed_pool)
96
+ return transformed_user_pools
97
+
98
+
99
+ @timeit
100
+ def load_identity_pools(
101
+ neo4j_session: neo4j.Session,
102
+ data: List[Dict[str, Any]],
103
+ region: str,
104
+ current_aws_account_id: str,
105
+ aws_update_tag: int,
106
+ ) -> None:
107
+ logger.info(
108
+ f"Loading Cognito Identity Pools {len(data)} for region '{region}' into graph.",
109
+ )
110
+ load(
111
+ neo4j_session,
112
+ CognitoIdentityPoolSchema(),
113
+ data,
114
+ lastupdated=aws_update_tag,
115
+ Region=region,
116
+ AWS_ID=current_aws_account_id,
117
+ )
118
+
119
+
120
+ @timeit
121
+ def load_user_pools(
122
+ neo4j_session: neo4j.Session,
123
+ data: List[Dict[str, Any]],
124
+ region: str,
125
+ current_aws_account_id: str,
126
+ aws_update_tag: int,
127
+ ) -> None:
128
+ logger.info(
129
+ f"Loading Cognito User Pools {len(data)} for region '{region}' into graph.",
130
+ )
131
+ load(
132
+ neo4j_session,
133
+ CognitoUserPoolSchema(),
134
+ data,
135
+ lastupdated=aws_update_tag,
136
+ Region=region,
137
+ AWS_ID=current_aws_account_id,
138
+ )
139
+
140
+
141
+ @timeit
142
+ def cleanup(
143
+ neo4j_session: neo4j.Session,
144
+ common_job_parameters: Dict[str, Any],
145
+ ) -> None:
146
+ logger.debug("Running Efs cleanup job.")
147
+ GraphJob.from_node_schema(CognitoIdentityPoolSchema(), common_job_parameters).run(
148
+ neo4j_session
149
+ )
150
+ GraphJob.from_node_schema(CognitoUserPoolSchema(), common_job_parameters).run(
151
+ neo4j_session
152
+ )
153
+
154
+
155
+ @timeit
156
+ def sync(
157
+ neo4j_session: neo4j.Session,
158
+ boto3_session: boto3.session.Session,
159
+ regions: List[str],
160
+ current_aws_account_id: str,
161
+ update_tag: int,
162
+ common_job_parameters: Dict[str, Any],
163
+ ) -> None:
164
+ for region in regions:
165
+ logger.info(
166
+ f"Syncing Cognito Identity Pools for region '{region}' in account '{current_aws_account_id}'.",
167
+ )
168
+
169
+ identity_pools = get_identity_pools(boto3_session, region)
170
+ if not identity_pools:
171
+ logger.info(
172
+ f"No Cognito Identity Pools found in region '{region}'. Skipping sync."
173
+ )
174
+ else:
175
+ identity_pool_roles = get_identity_pool_roles(
176
+ boto3_session, identity_pools, region
177
+ )
178
+ transformed_identity_pools = transform_identity_pools(
179
+ identity_pool_roles, region
180
+ )
181
+
182
+ load_identity_pools(
183
+ neo4j_session,
184
+ transformed_identity_pools,
185
+ region,
186
+ current_aws_account_id,
187
+ update_tag,
188
+ )
189
+
190
+ user_pools = get_user_pools(boto3_session, region)
191
+ transformed_user_pools = transform_user_pools(user_pools, region)
192
+
193
+ load_user_pools(
194
+ neo4j_session,
195
+ transformed_user_pools,
196
+ region,
197
+ current_aws_account_id,
198
+ update_tag,
199
+ )
200
+
201
+ cleanup(neo4j_session, common_job_parameters)