cartography 0.105.0__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (108) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +78 -2
  3. cartography/client/core/tx.py +62 -0
  4. cartography/config.py +24 -0
  5. cartography/data/indexes.cypher +0 -34
  6. cartography/driftdetect/cli.py +3 -2
  7. cartography/graph/cleanupbuilder.py +47 -0
  8. cartography/graph/job.py +42 -0
  9. cartography/graph/querybuilder.py +136 -2
  10. cartography/graph/statement.py +1 -1
  11. cartography/intel/airbyte/__init__.py +105 -0
  12. cartography/intel/airbyte/connections.py +120 -0
  13. cartography/intel/airbyte/destinations.py +81 -0
  14. cartography/intel/airbyte/organizations.py +59 -0
  15. cartography/intel/airbyte/sources.py +78 -0
  16. cartography/intel/airbyte/tags.py +64 -0
  17. cartography/intel/airbyte/users.py +106 -0
  18. cartography/intel/airbyte/util.py +122 -0
  19. cartography/intel/airbyte/workspaces.py +63 -0
  20. cartography/intel/aws/codebuild.py +132 -0
  21. cartography/intel/aws/ecs.py +228 -380
  22. cartography/intel/aws/efs.py +261 -0
  23. cartography/intel/aws/identitycenter.py +14 -3
  24. cartography/intel/aws/inspector.py +96 -53
  25. cartography/intel/aws/rds.py +2 -1
  26. cartography/intel/aws/resources.py +4 -0
  27. cartography/intel/entra/__init__.py +11 -0
  28. cartography/intel/entra/applications.py +366 -0
  29. cartography/intel/entra/users.py +84 -42
  30. cartography/intel/kubernetes/__init__.py +30 -14
  31. cartography/intel/kubernetes/clusters.py +86 -0
  32. cartography/intel/kubernetes/namespaces.py +59 -57
  33. cartography/intel/kubernetes/pods.py +140 -77
  34. cartography/intel/kubernetes/secrets.py +95 -45
  35. cartography/intel/kubernetes/services.py +131 -67
  36. cartography/intel/kubernetes/util.py +125 -14
  37. cartography/intel/scaleway/__init__.py +127 -0
  38. cartography/intel/scaleway/iam/__init__.py +0 -0
  39. cartography/intel/scaleway/iam/apikeys.py +71 -0
  40. cartography/intel/scaleway/iam/applications.py +71 -0
  41. cartography/intel/scaleway/iam/groups.py +71 -0
  42. cartography/intel/scaleway/iam/users.py +71 -0
  43. cartography/intel/scaleway/instances/__init__.py +0 -0
  44. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  45. cartography/intel/scaleway/instances/instances.py +92 -0
  46. cartography/intel/scaleway/projects.py +79 -0
  47. cartography/intel/scaleway/storage/__init__.py +0 -0
  48. cartography/intel/scaleway/storage/snapshots.py +86 -0
  49. cartography/intel/scaleway/storage/volumes.py +84 -0
  50. cartography/intel/scaleway/utils.py +37 -0
  51. cartography/models/airbyte/__init__.py +0 -0
  52. cartography/models/airbyte/connection.py +138 -0
  53. cartography/models/airbyte/destination.py +75 -0
  54. cartography/models/airbyte/organization.py +19 -0
  55. cartography/models/airbyte/source.py +75 -0
  56. cartography/models/airbyte/stream.py +74 -0
  57. cartography/models/airbyte/tag.py +69 -0
  58. cartography/models/airbyte/user.py +111 -0
  59. cartography/models/airbyte/workspace.py +46 -0
  60. cartography/models/aws/codebuild/__init__.py +0 -0
  61. cartography/models/aws/codebuild/project.py +49 -0
  62. cartography/models/aws/ecs/__init__.py +0 -0
  63. cartography/models/aws/ecs/clusters.py +64 -0
  64. cartography/models/aws/ecs/container_definitions.py +93 -0
  65. cartography/models/aws/ecs/container_instances.py +84 -0
  66. cartography/models/aws/ecs/containers.py +99 -0
  67. cartography/models/aws/ecs/services.py +117 -0
  68. cartography/models/aws/ecs/task_definitions.py +135 -0
  69. cartography/models/aws/ecs/tasks.py +110 -0
  70. cartography/models/aws/efs/__init__.py +0 -0
  71. cartography/models/aws/efs/access_point.py +77 -0
  72. cartography/models/aws/efs/file_system.py +60 -0
  73. cartography/models/aws/efs/mount_target.py +79 -0
  74. cartography/models/core/common.py +1 -0
  75. cartography/models/core/relationships.py +44 -0
  76. cartography/models/entra/app_role_assignment.py +115 -0
  77. cartography/models/entra/application.py +47 -0
  78. cartography/models/entra/user.py +17 -51
  79. cartography/models/kubernetes/__init__.py +0 -0
  80. cartography/models/kubernetes/clusters.py +26 -0
  81. cartography/models/kubernetes/containers.py +108 -0
  82. cartography/models/kubernetes/namespaces.py +51 -0
  83. cartography/models/kubernetes/pods.py +80 -0
  84. cartography/models/kubernetes/secrets.py +79 -0
  85. cartography/models/kubernetes/services.py +108 -0
  86. cartography/models/scaleway/__init__.py +0 -0
  87. cartography/models/scaleway/iam/__init__.py +0 -0
  88. cartography/models/scaleway/iam/apikey.py +96 -0
  89. cartography/models/scaleway/iam/application.py +52 -0
  90. cartography/models/scaleway/iam/group.py +95 -0
  91. cartography/models/scaleway/iam/user.py +60 -0
  92. cartography/models/scaleway/instance/__init__.py +0 -0
  93. cartography/models/scaleway/instance/flexibleip.py +52 -0
  94. cartography/models/scaleway/instance/instance.py +118 -0
  95. cartography/models/scaleway/organization.py +19 -0
  96. cartography/models/scaleway/project.py +48 -0
  97. cartography/models/scaleway/storage/__init__.py +0 -0
  98. cartography/models/scaleway/storage/snapshot.py +78 -0
  99. cartography/models/scaleway/storage/volume.py +51 -0
  100. cartography/sync.py +8 -4
  101. cartography/util.py +15 -10
  102. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/METADATA +5 -2
  103. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/RECORD +107 -35
  104. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  105. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/WHEEL +0 -0
  106. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/entry_points.txt +0 -0
  107. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/licenses/LICENSE +0 -0
  108. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,122 @@
1
+ import logging
2
+ import time
3
+ from typing import Any
4
+
5
+ import requests
6
+
7
+ logger = logging.getLogger(__name__)
8
+ # Connect and read timeouts of 60 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
9
+ _TIMEOUT = (60, 60)
10
+
11
+
12
+ class AirbyteClient:
13
+ """A client for interacting with the Airbyte API.
14
+ This client handles authentication and provides methods to make GET requests to the Airbyte API.
15
+ It automatically handles pagination for GET requests that return multiple pages of data.
16
+ """
17
+
18
+ def __init__(self, base_url: str, client_id: str, client_secret: str) -> None:
19
+ self._client_id = client_id
20
+ self._client_secret = client_secret
21
+ self.base_url = base_url
22
+ self._access_token_expiry: int | None = None
23
+ self._session = requests.Session()
24
+
25
+ def get(self, uri: str, params: dict | None = None, offset: int = 0) -> list[dict]:
26
+ """Make a GET request to the Airbyte API.
27
+ This method handles authentication and pagination.
28
+ Args:
29
+ uri (str): The URI to make the GET request to.
30
+ params (dict | None): Optional parameters to include in the request.
31
+ offset (int): The offset for pagination, defaults to 0.
32
+ Returns:
33
+ list[dict]: A list of dictionaries containing the data from the response.
34
+ """
35
+ self.authenticate()
36
+ if params is None:
37
+ params_with_pagination = {}
38
+ else:
39
+ params_with_pagination = params.copy()
40
+ params_with_pagination["offset"] = offset
41
+ response = self._session.get(
42
+ f"{self.base_url}{uri}", params=params_with_pagination, timeout=_TIMEOUT
43
+ )
44
+ response.raise_for_status()
45
+ data = response.json().get("data")
46
+ if response.json().get("next", "") != "":
47
+ data.extend(
48
+ self.get(
49
+ uri,
50
+ params=params,
51
+ offset=offset + len(data),
52
+ )
53
+ )
54
+ return data
55
+
56
+ def authenticate(self) -> None:
57
+ """Authenticate with the Airbyte API using client credentials.
58
+ This method checks if the access token is still valid and renews it if necessary.
59
+ If the access token is expired or not set, it will make a request to obtain a new access token.
60
+ """
61
+ if self._access_token_expiry and self._access_token_expiry >= time.time():
62
+ return
63
+ self._session.headers.pop("Authorization", None)
64
+ payload = {
65
+ "grant-type": "client_credentials",
66
+ "client_id": self._client_id,
67
+ "client_secret": self._client_secret,
68
+ }
69
+ response = self._session.post(
70
+ f"{self.base_url}/applications/token", json=payload, timeout=_TIMEOUT
71
+ )
72
+ response.raise_for_status()
73
+ data = response.json()
74
+ self._session.headers["Authorization"] = (
75
+ f"Bearer {data.get('access_token', '')}"
76
+ )
77
+ token_expiry = data.get("expires_in", 0)
78
+ self._access_token_expiry = time.time() + token_expiry
79
+ logger.debug("Access token renewed, expires in %s seconds.", token_expiry)
80
+
81
+
82
+ def normalize_airbyte_config(config: dict[str, Any]) -> dict[str, Any]:
83
+ """Normalize the Airbyte configuration dictionary.
84
+ This function takes a configuration dictionary and normalizes it by mapping keys to a standard set of keys.
85
+ This is useful for ensuring consistency across different configurations, and will allow to connect to existing nodes.
86
+ Args:
87
+ config (dict[str, Any]): The configuration dictionary to normalize.
88
+ Returns:
89
+ dict[str, Any]: A normalized configuration dictionary with standardized keys.
90
+ """
91
+ normalized_config = {}
92
+ for key in config:
93
+ if key in ("host", "port", "name", "region", "endpoint", "account", ""):
94
+ normalized_config[key] = config[key]
95
+ elif key in ("aws_region_name", "region_name", "s3_bucket_region"):
96
+ normalized_config["region"] = config[key]
97
+ elif key in ("queue_url", "url", "s3_endpoint"):
98
+ normalized_config["endpoint"] = config[key]
99
+ elif key in ("azure_blob_storage_account_name", "storage_account_name"):
100
+ normalized_config["account"] = config[key]
101
+ elif key in (
102
+ "azure_blob_storage_container_name",
103
+ "bucket",
104
+ "database",
105
+ "s3_bucket_name",
106
+ ):
107
+ normalized_config["name"] = config[key]
108
+ return normalized_config
109
+
110
+
111
+ def list_to_string(lst: list[str]) -> str | None:
112
+ """Convert a list of strings to a comma-separated string."""
113
+ if len(lst) == 0:
114
+ return None
115
+ # Sublist
116
+ formated_list: list[str] = []
117
+ for item in lst:
118
+ if isinstance(item, list):
119
+ formated_list.append("|".join(item))
120
+ else:
121
+ formated_list.append(str(item))
122
+ return ",".join(formated_list)
@@ -0,0 +1,63 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import neo4j
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.graph.job import GraphJob
10
+ from cartography.intel.airbyte.util import AirbyteClient
11
+ from cartography.models.airbyte.workspace import AirbyteWorkspaceSchema
12
+ from cartography.util import timeit
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @timeit
18
+ def sync(
19
+ neo4j_session: neo4j.Session,
20
+ api_session: AirbyteClient,
21
+ org_id: str,
22
+ common_job_parameters: Dict[str, Any],
23
+ ) -> List[Dict[str, Any]]:
24
+ workspaces = get(api_session, org_id)
25
+ load_workspaces(
26
+ neo4j_session, workspaces, org_id, common_job_parameters["UPDATE_TAG"]
27
+ )
28
+ cleanup(neo4j_session, common_job_parameters)
29
+ return workspaces
30
+
31
+
32
+ @timeit
33
+ def get(
34
+ api_session: AirbyteClient,
35
+ org_id: str,
36
+ ) -> List[Dict[str, Any]]:
37
+ return api_session.get("/workspaces", params={"organizationId": org_id})
38
+
39
+
40
+ @timeit
41
+ def load_workspaces(
42
+ neo4j_session: neo4j.Session,
43
+ data: List[Dict[str, Any]],
44
+ org_id: str,
45
+ update_tag: int,
46
+ ) -> None:
47
+ logger.info("Loading %d Airbyte Workspaces into Neo4j.", len(data))
48
+ load(
49
+ neo4j_session,
50
+ AirbyteWorkspaceSchema(),
51
+ data,
52
+ lastupdated=update_tag,
53
+ ORG_ID=org_id,
54
+ )
55
+
56
+
57
+ @timeit
58
+ def cleanup(
59
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
60
+ ) -> None:
61
+ GraphJob.from_node_schema(AirbyteWorkspaceSchema(), common_job_parameters).run(
62
+ neo4j_session
63
+ )
@@ -0,0 +1,132 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import boto3
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.aws.ec2.util import get_botocore_config
12
+ from cartography.models.aws.codebuild.project import CodeBuildProjectSchema
13
+ from cartography.util import aws_handle_regions
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ @aws_handle_regions
21
+ def get_all_codebuild_projects(
22
+ boto3_session: boto3.Session, region: str
23
+ ) -> List[Dict[str, Any]]:
24
+
25
+ client = boto3_session.client(
26
+ "codebuild", region_name=region, config=get_botocore_config()
27
+ )
28
+ paginator = client.get_paginator("list_projects")
29
+
30
+ all_projects = []
31
+
32
+ for page in paginator.paginate():
33
+ project_names = page.get("projects", [])
34
+ if not project_names:
35
+ continue
36
+
37
+ # AWS batch_get_projects accepts up to 100 project names per call as per AWS documentation.
38
+ for i in range(0, len(project_names), 100):
39
+ batch = project_names[i : i + 100]
40
+ response = client.batch_get_projects(names=batch)
41
+ projects = response.get("projects", [])
42
+ all_projects.extend(projects)
43
+ return all_projects
44
+
45
+
46
+ def transform_codebuild_projects(
47
+ projects: List[Dict[str, Any]], region: str
48
+ ) -> List[Dict[str, Any]]:
49
+ """
50
+ Transform CodeBuild project data for ingestion into Neo4j.
51
+
52
+ - Includes all environment variable names.
53
+ - Variables of type 'PLAINTEXT' retain their values.
54
+ - Other types (e.g., 'PARAMETER_STORE', 'SECRETS_MANAGER') have their values redacted.
55
+ """
56
+ transformed_codebuild_projects = []
57
+ for project in projects:
58
+ env_vars = project.get("environment", {}).get("environmentVariables", [])
59
+ env_var_strings = [
60
+ f"{var.get('name')}={var.get('value') if var.get('type') == 'PLAINTEXT' else '<REDACTED>'}"
61
+ for var in env_vars
62
+ ]
63
+ transformed_project = {
64
+ "arn": project["arn"],
65
+ "created": project.get("created"),
66
+ "environmentVariables": env_var_strings,
67
+ "sourceType": project.get("source", {}).get("type"),
68
+ "sourceLocation": project.get("source", {}).get("location"),
69
+ }
70
+ transformed_codebuild_projects.append(transformed_project)
71
+
72
+ return transformed_codebuild_projects
73
+
74
+
75
+ @timeit
76
+ def load_codebuild_projects(
77
+ neo4j_session: neo4j.Session,
78
+ data: List[Dict[str, Any]],
79
+ region: str,
80
+ current_aws_account_id: str,
81
+ aws_update_tag: int,
82
+ ) -> None:
83
+ logger.info(
84
+ f"Loading CodeBuild {len(data)} projects for region '{region}' into graph.",
85
+ )
86
+ load(
87
+ neo4j_session,
88
+ CodeBuildProjectSchema(),
89
+ data,
90
+ lastupdated=aws_update_tag,
91
+ Region=region,
92
+ AWS_ID=current_aws_account_id,
93
+ )
94
+
95
+
96
+ @timeit
97
+ def cleanup(
98
+ neo4j_session: neo4j.Session,
99
+ common_job_parameters: Dict[str, Any],
100
+ ) -> None:
101
+ logger.debug("Running Efs cleanup job.")
102
+ GraphJob.from_node_schema(CodeBuildProjectSchema(), common_job_parameters).run(
103
+ neo4j_session
104
+ )
105
+
106
+
107
+ @timeit
108
+ def sync(
109
+ neo4j_session: neo4j.Session,
110
+ boto3_session: boto3.session.Session,
111
+ regions: List[str],
112
+ current_aws_account_id: str,
113
+ update_tag: int,
114
+ common_job_parameters: Dict[str, Any],
115
+ ) -> None:
116
+ for region in regions:
117
+ logger.info(
118
+ f"Syncing CodeBuild for region '{region}' in account '{current_aws_account_id}'.",
119
+ )
120
+
121
+ projects = get_all_codebuild_projects(boto3_session, region)
122
+ transformed_projects = transform_codebuild_projects(projects, region)
123
+
124
+ load_codebuild_projects(
125
+ neo4j_session,
126
+ transformed_projects,
127
+ region,
128
+ current_aws_account_id,
129
+ update_tag,
130
+ )
131
+
132
+ cleanup(neo4j_session, common_job_parameters)