cloe-nessy 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/__init__.py +0 -0
- cloe_nessy/clients/__init__.py +5 -0
- cloe_nessy/clients/api_client/__init__.py +3 -0
- cloe_nessy/clients/api_client/api_client.py +188 -0
- cloe_nessy/clients/api_client/api_response.py +72 -0
- cloe_nessy/clients/api_client/auth.py +178 -0
- cloe_nessy/clients/api_client/exceptions.py +22 -0
- cloe_nessy/file_utilities/__init__.py +3 -0
- cloe_nessy/file_utilities/exceptions.py +4 -0
- cloe_nessy/file_utilities/factory.py +42 -0
- cloe_nessy/file_utilities/get_file_paths.py +72 -0
- cloe_nessy/file_utilities/location_types.py +29 -0
- cloe_nessy/file_utilities/strategies/__init__.py +0 -0
- cloe_nessy/file_utilities/strategies/base_strategy.py +59 -0
- cloe_nessy/file_utilities/strategies/local_strategy.py +51 -0
- cloe_nessy/file_utilities/strategies/onelake_strategy.py +31 -0
- cloe_nessy/file_utilities/strategies/utils_strategy.py +72 -0
- cloe_nessy/integration/__init__.py +0 -0
- cloe_nessy/integration/reader/__init__.py +6 -0
- cloe_nessy/integration/reader/api_reader.py +141 -0
- cloe_nessy/integration/reader/catalog_reader.py +49 -0
- cloe_nessy/integration/reader/excel_reader.py +170 -0
- cloe_nessy/integration/reader/exceptions.py +10 -0
- cloe_nessy/integration/reader/file_reader.py +96 -0
- cloe_nessy/integration/reader/reader.py +34 -0
- cloe_nessy/integration/writer/__init__.py +3 -0
- cloe_nessy/integration/writer/catalog_writer.py +48 -0
- cloe_nessy/logging/__init__.py +3 -0
- cloe_nessy/logging/logger_mixin.py +162 -0
- cloe_nessy/models/__init__.py +13 -0
- cloe_nessy/models/column.py +65 -0
- cloe_nessy/models/constraint.py +9 -0
- cloe_nessy/models/foreign_key.py +34 -0
- cloe_nessy/models/mixins/__init__.py +0 -0
- cloe_nessy/models/mixins/read_instance_mixin.py +124 -0
- cloe_nessy/models/mixins/template_loader_mixin.py +18 -0
- cloe_nessy/models/schema.py +76 -0
- cloe_nessy/models/table.py +236 -0
- cloe_nessy/models/types.py +7 -0
- cloe_nessy/object_manager/__init__.py +3 -0
- cloe_nessy/object_manager/table_manager.py +58 -0
- cloe_nessy/pipeline/__init__.py +7 -0
- cloe_nessy/pipeline/actions/__init__.py +50 -0
- cloe_nessy/pipeline/actions/read_api.py +178 -0
- cloe_nessy/pipeline/actions/read_catalog_table.py +68 -0
- cloe_nessy/pipeline/actions/read_excel.py +177 -0
- cloe_nessy/pipeline/actions/read_files.py +105 -0
- cloe_nessy/pipeline/actions/read_metadata_yaml.py +66 -0
- cloe_nessy/pipeline/actions/transform_change_datatype.py +56 -0
- cloe_nessy/pipeline/actions/transform_concat_columns.py +88 -0
- cloe_nessy/pipeline/actions/transform_decode.py +102 -0
- cloe_nessy/pipeline/actions/transform_distinct.py +40 -0
- cloe_nessy/pipeline/actions/transform_filter.py +51 -0
- cloe_nessy/pipeline/actions/transform_generic_sql.py +66 -0
- cloe_nessy/pipeline/actions/transform_join.py +81 -0
- cloe_nessy/pipeline/actions/transform_json_normalize.py +106 -0
- cloe_nessy/pipeline/actions/transform_rename_columns.py +60 -0
- cloe_nessy/pipeline/actions/transform_replace_values.py +59 -0
- cloe_nessy/pipeline/actions/transform_select_columns.py +83 -0
- cloe_nessy/pipeline/actions/transform_union.py +71 -0
- cloe_nessy/pipeline/actions/write_catalog_table.py +73 -0
- cloe_nessy/pipeline/pipeline.py +201 -0
- cloe_nessy/pipeline/pipeline_action.py +62 -0
- cloe_nessy/pipeline/pipeline_config.py +92 -0
- cloe_nessy/pipeline/pipeline_context.py +56 -0
- cloe_nessy/pipeline/pipeline_parsing_service.py +156 -0
- cloe_nessy/pipeline/pipeline_step.py +50 -0
- cloe_nessy/py.typed +0 -0
- cloe_nessy/session/__init__.py +3 -0
- cloe_nessy/session/session_manager.py +188 -0
- cloe_nessy/settings/__init__.py +3 -0
- cloe_nessy/settings/settings.py +91 -0
- cloe_nessy/utils/__init__.py +0 -0
- cloe_nessy/utils/file_and_directory_handler.py +19 -0
- cloe_nessy-0.2.9.dist-info/METADATA +26 -0
- cloe_nessy-0.2.9.dist-info/RECORD +78 -0
- cloe_nessy-0.2.9.dist-info/WHEEL +5 -0
- cloe_nessy-0.2.9.dist-info/top_level.txt +1 -0
cloe_nessy/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from http import HTTPStatus
|
|
2
|
+
from time import sleep
|
|
3
|
+
from typing import Any
|
|
4
|
+
from urllib.parse import urljoin
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
from requests.auth import AuthBase
|
|
8
|
+
|
|
9
|
+
from .api_response import APIResponse
|
|
10
|
+
from .exceptions import APIClientConnectionError, APIClientError, APIClientHTTPError, APIClientTimeoutError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class APIClient:
|
|
14
|
+
"""A standardized client for the interaction with APIs.
|
|
15
|
+
|
|
16
|
+
This class handles the communication with an API, including retries for specific status codes.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
RETRY_CODES: List of HTTP status codes that should trigger a retry.
|
|
20
|
+
MAX_SLEEP_TIME: Maximum time to wait between retries, in seconds.
|
|
21
|
+
base_url: The base URL for the API.
|
|
22
|
+
session: The session object for making requests.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
RETRY_CODES: list[int] = [
|
|
26
|
+
HTTPStatus.TOO_MANY_REQUESTS,
|
|
27
|
+
HTTPStatus.SERVICE_UNAVAILABLE,
|
|
28
|
+
HTTPStatus.GATEWAY_TIMEOUT,
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
MAX_SLEEP_TIME: int = 180 # seconds
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
base_url: str,
|
|
36
|
+
auth: AuthBase | None = None,
|
|
37
|
+
default_headers: dict[str, str] | None = None,
|
|
38
|
+
):
|
|
39
|
+
"""Initializes the APIClient object.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
base_url: The base URL for the API.
|
|
43
|
+
auth: The authentication method for the API.
|
|
44
|
+
default_headers: Default headers to include in requests.
|
|
45
|
+
"""
|
|
46
|
+
if not base_url.endswith("/"):
|
|
47
|
+
base_url += "/"
|
|
48
|
+
self.base_url = base_url
|
|
49
|
+
self.session = requests.Session()
|
|
50
|
+
if default_headers:
|
|
51
|
+
self.session.headers.update(default_headers)
|
|
52
|
+
self.session.auth = auth
|
|
53
|
+
|
|
54
|
+
def _make_request(
|
|
55
|
+
self,
|
|
56
|
+
method: str,
|
|
57
|
+
endpoint: str,
|
|
58
|
+
timeout: int = 30,
|
|
59
|
+
params: dict[str, str] | None = None,
|
|
60
|
+
data: dict[str, str] | None = None,
|
|
61
|
+
json: dict[str, str] | None = None,
|
|
62
|
+
headers: dict[str, str] | None = None,
|
|
63
|
+
max_retries: int = 0,
|
|
64
|
+
) -> APIResponse:
|
|
65
|
+
"""Makes a request to the API endpoint.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
method: The HTTP method to use for the request.
|
|
69
|
+
endpoint: The endpoint to send the request to.
|
|
70
|
+
timeout: The timeout for the request in seconds.
|
|
71
|
+
params: The query parameters for the request.
|
|
72
|
+
data: The form data to include in the request.
|
|
73
|
+
json: The JSON data to include in the request.
|
|
74
|
+
headers: The headers to include in the request.
|
|
75
|
+
max_retries: The maximum number of retries for the request.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
APIResponse: The response from the API.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
APIClientError: If the request fails.
|
|
82
|
+
"""
|
|
83
|
+
url = urljoin(self.base_url, endpoint.strip("/"))
|
|
84
|
+
params = params or {}
|
|
85
|
+
data = data or {}
|
|
86
|
+
json = json or {}
|
|
87
|
+
headers = headers or {}
|
|
88
|
+
|
|
89
|
+
for attempt in range(max_retries + 1):
|
|
90
|
+
try:
|
|
91
|
+
response = self.session.request(
|
|
92
|
+
method=method,
|
|
93
|
+
url=url,
|
|
94
|
+
timeout=timeout,
|
|
95
|
+
params=params,
|
|
96
|
+
data=data,
|
|
97
|
+
json=json,
|
|
98
|
+
headers=headers,
|
|
99
|
+
)
|
|
100
|
+
if response.status_code not in APIClient.RETRY_CODES:
|
|
101
|
+
response.raise_for_status()
|
|
102
|
+
return APIResponse(response)
|
|
103
|
+
except requests.exceptions.HTTPError as err:
|
|
104
|
+
raise APIClientHTTPError(f"HTTP error occurred: {err}") from err
|
|
105
|
+
except requests.exceptions.ConnectionError as err:
|
|
106
|
+
if attempt < max_retries:
|
|
107
|
+
sleep_time = min(2**attempt, APIClient.MAX_SLEEP_TIME)
|
|
108
|
+
sleep(sleep_time)
|
|
109
|
+
continue
|
|
110
|
+
raise APIClientConnectionError(f"Connection error occurred: {err}") from err
|
|
111
|
+
except requests.exceptions.Timeout as err:
|
|
112
|
+
raise APIClientTimeoutError(f"Timeout error occurred: {err}") from err
|
|
113
|
+
except requests.exceptions.RequestException as err:
|
|
114
|
+
raise APIClientError(f"An error occurred: {err}") from err
|
|
115
|
+
raise APIClientError(f"The maximum configured retries of [ '{max_retries}' ] have been exceeded")
|
|
116
|
+
|
|
117
|
+
def get(self, endpoint: str, **kwargs: Any) -> APIResponse:
|
|
118
|
+
"""Sends a GET request to the specified endpoint.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
endpoint: The endpoint to send the request to.
|
|
122
|
+
**kwargs: Additional arguments to pass to the request.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
APIResponse: The response from the API.
|
|
126
|
+
"""
|
|
127
|
+
return self._make_request(method="GET", endpoint=endpoint, **kwargs)
|
|
128
|
+
|
|
129
|
+
def post(self, endpoint: str, **kwargs: Any) -> APIResponse:
|
|
130
|
+
"""Sends a POST request to the specified endpoint.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
endpoint: The endpoint to send the request to.
|
|
134
|
+
**kwargs: Additional arguments to pass to the request.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
APIResponse: The response from the API.
|
|
138
|
+
"""
|
|
139
|
+
return self._make_request(method="POST", endpoint=endpoint, **kwargs)
|
|
140
|
+
|
|
141
|
+
def put(self, endpoint: str, **kwargs: Any) -> APIResponse:
|
|
142
|
+
"""Sends a PUT request to the specified endpoint.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
endpoint: The endpoint to send the request to.
|
|
146
|
+
**kwargs: Additional arguments to pass to the request.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
APIResponse: The response from the API.
|
|
150
|
+
"""
|
|
151
|
+
return self._make_request(method="PUT", endpoint=endpoint, **kwargs)
|
|
152
|
+
|
|
153
|
+
def delete(self, endpoint: str, **kwargs: Any) -> APIResponse:
|
|
154
|
+
"""Sends a DELETE request to the specified endpoint.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
endpoint: The endpoint to send the request to.
|
|
158
|
+
**kwargs: Additional arguments to pass to the request.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
APIResponse: The response from the API.
|
|
162
|
+
"""
|
|
163
|
+
return self._make_request(method="DELETE", endpoint=endpoint, **kwargs)
|
|
164
|
+
|
|
165
|
+
def patch(self, endpoint: str, **kwargs: Any) -> APIResponse:
|
|
166
|
+
"""Sends a PATCH request to the specified endpoint.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
endpoint: The endpoint to send the request to.
|
|
170
|
+
**kwargs: Additional arguments to pass to the request.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
APIResponse: The response from the API.
|
|
174
|
+
"""
|
|
175
|
+
return self._make_request(method="PATCH", endpoint=endpoint, **kwargs)
|
|
176
|
+
|
|
177
|
+
def request(self, method: str, endpoint: str, **kwargs: Any) -> APIResponse:
|
|
178
|
+
"""Sends a request to the specified endpoint with the specified method.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
method: The HTTP method to use for the request.
|
|
182
|
+
endpoint: The endpoint to send the request to.
|
|
183
|
+
**kwargs: Additional arguments to pass to the request.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
APIResponse: The response from the API.
|
|
187
|
+
"""
|
|
188
|
+
return self._make_request(method=method, endpoint=endpoint, **kwargs)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
from .exceptions import APIClientError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class APIResponse:
|
|
9
|
+
"""An abstracted response to implement parsing.
|
|
10
|
+
|
|
11
|
+
This class provides methods to parse the response from an API request.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
response: The original response object.
|
|
15
|
+
headers: The headers of the response.
|
|
16
|
+
status_code: The status code of the response.
|
|
17
|
+
content_type: The content type of the response.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, response: requests.Response):
|
|
21
|
+
"""Initializes the APIResponse object.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
response: The response object from an API request.
|
|
25
|
+
"""
|
|
26
|
+
self.response = response
|
|
27
|
+
self.headers = self.response.headers
|
|
28
|
+
self.status_code = self.response.status_code
|
|
29
|
+
self.url = self.response.url
|
|
30
|
+
self.reason = self.response.reason
|
|
31
|
+
self.elapsed = self.response.elapsed
|
|
32
|
+
self.content_type = self.headers.get("Content-Type", "").lower()
|
|
33
|
+
|
|
34
|
+
def to_dict(self, key: str | None = None) -> dict[str, Any]:
|
|
35
|
+
"""Parses the values from the response into a dictionary.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
key: The key to return from the dictionary. If specified, the method
|
|
39
|
+
will return the value associated with this key from the parsed dictionary.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
The response parsed to a dictionary. If a key is specified,
|
|
43
|
+
the method returns the value associated with this key.
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
KeyError: If the specified key is not found in the response.
|
|
47
|
+
ValueError: If there is an error parsing the JSON response.
|
|
48
|
+
Exception: For any other unexpected errors.
|
|
49
|
+
"""
|
|
50
|
+
dict_response = {}
|
|
51
|
+
try:
|
|
52
|
+
if "application/json" in self.content_type:
|
|
53
|
+
dict_response = self.response.json()
|
|
54
|
+
else:
|
|
55
|
+
# Handling of other response types can be added below.
|
|
56
|
+
dict_response = {"value": self.response.text}
|
|
57
|
+
|
|
58
|
+
if key:
|
|
59
|
+
dict_response = dict_response[key]
|
|
60
|
+
except KeyError as err:
|
|
61
|
+
raise KeyError(
|
|
62
|
+
f"The key '{err.args[0]}' was not found in the response. Status code: {self.status_code}, Headers: {self.headers}, Response: {dict_response}"
|
|
63
|
+
) from err
|
|
64
|
+
except ValueError as err:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"Error parsing JSON response: {err}. Status code: {self.status_code}, Headers: {self.headers}, Response content: {self.response.text}"
|
|
67
|
+
) from err
|
|
68
|
+
except Exception as err:
|
|
69
|
+
raise APIClientError(
|
|
70
|
+
f"An unexpected error occurred: {err}. Status code: {self.status_code}, Headers: {self.headers}, Response content: {self.response.text}"
|
|
71
|
+
) from err
|
|
72
|
+
return dict_response
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from azure.core.credentials import TokenCredential
|
|
6
|
+
from azure.identity import ClientSecretCredential
|
|
7
|
+
from requests import PreparedRequest
|
|
8
|
+
from requests.auth import AuthBase
|
|
9
|
+
|
|
10
|
+
from ...session import SessionManager
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AzureCredentialAuth(AuthBase):
|
|
14
|
+
"""This Auth can be used with requests and an Azure Credential."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
scope: str,
|
|
19
|
+
credential: TokenCredential | ClientSecretCredential | None = None,
|
|
20
|
+
client_id: str | None = None,
|
|
21
|
+
client_secret: str | None = None,
|
|
22
|
+
tenant_id: str | None = None,
|
|
23
|
+
):
|
|
24
|
+
"""Initializes the AzureCredentialAuth with an Azure credential.
|
|
25
|
+
|
|
26
|
+
The client can either be initialized with a TokenCredential object or with the client_id, client_secret, and tenant_id via an ClientSecretCredential.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
scope: The scope for the token. E.g., the client ID of the Azure AD application.
|
|
30
|
+
credential: The Azure credential object.
|
|
31
|
+
client_id: The client ID for the Azure AD application.
|
|
32
|
+
client_secret: The client secret for the Azure AD application.
|
|
33
|
+
tenant_id: The tenant ID for the Azure AD application.
|
|
34
|
+
"""
|
|
35
|
+
if credential is None:
|
|
36
|
+
if client_id is None or client_secret is None or tenant_id is None:
|
|
37
|
+
raise ValueError("Either a credential or client_id, client_secret, and tenant_id must be provided.")
|
|
38
|
+
credential = ClientSecretCredential(
|
|
39
|
+
tenant_id=tenant_id,
|
|
40
|
+
client_id=client_id,
|
|
41
|
+
client_secret=client_secret,
|
|
42
|
+
)
|
|
43
|
+
self.credential = credential
|
|
44
|
+
self.scope = scope
|
|
45
|
+
self._token = None
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def token(self):
|
|
49
|
+
"""Get a valid token using the TokenCredential."""
|
|
50
|
+
if self._token is None or self._token.expires_on < (int(time.time()) + 5):
|
|
51
|
+
self._token = self.credential.get_token(self.scope)
|
|
52
|
+
return self._token.token
|
|
53
|
+
|
|
54
|
+
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
|
55
|
+
"""Appends an Authorization header to the request using the provided Azure credential.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
r (PreparedRequest): The request that needs to be sent.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
PreparedRequest: The same request object but with an added Authorization header.
|
|
62
|
+
"""
|
|
63
|
+
r.headers["Authorization"] = f"Bearer {self.token}"
|
|
64
|
+
return r
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class SecretScopeAuth(AuthBase):
|
|
68
|
+
"""This Auth pulls Secrets from a Secret Scope."""
|
|
69
|
+
|
|
70
|
+
def __init__(self, header_template: dict[str, str], secret_scope: str):
|
|
71
|
+
"""Initializes the SecretScopeAuth with a header template, secret scope, and secret key.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
header_template: The template for the header that will use the secret.
|
|
75
|
+
secret names are defined as placeholders in curly braces.
|
|
76
|
+
secret_scope: The secret scope from where the secrets will be retrieved.
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
```python
|
|
80
|
+
header_template = {
|
|
81
|
+
"jfrog-user-key": "jfrog-user",
|
|
82
|
+
"jfrog-password-key": "jfrog-secret",
|
|
83
|
+
}
|
|
84
|
+
auth = SecretScopeAuth(header_template, "my_secret_scope")
|
|
85
|
+
# given, that 'jfrog-user' and 'jfrog-secret' are secrets in 'my_secret_scope'
|
|
86
|
+
```
|
|
87
|
+
"""
|
|
88
|
+
self.header_template = header_template
|
|
89
|
+
self.secret_scope = secret_scope
|
|
90
|
+
|
|
91
|
+
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
|
92
|
+
"""The header is constructed using the template and the secret retrieved from the secret scope.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
r: The request that needs to be sent.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
PreparedRequest: The same request object, but with an added header. The header
|
|
99
|
+
is constructed using the template and the secret retrieved from
|
|
100
|
+
the secret scope.
|
|
101
|
+
"""
|
|
102
|
+
utils = SessionManager.get_utils()
|
|
103
|
+
auth_header = {key: utils.secrets.get(self.secret_scope, ref) for key, ref in self.header_template.items()}
|
|
104
|
+
r.headers.update(auth_header)
|
|
105
|
+
return r
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class ChainedAuth(AuthBase):
|
|
109
|
+
"""This Auth can be used to chain multiple Auths."""
|
|
110
|
+
|
|
111
|
+
def __init__(self, *args: Any):
|
|
112
|
+
"""Initializes the ChainedAuth.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
*args: One or more Auth objects that are chained to
|
|
116
|
+
construct the auth header.
|
|
117
|
+
|
|
118
|
+
Example:
|
|
119
|
+
```python
|
|
120
|
+
auth_1 = SecretScopeAuth({"secret": "key"}, "my_secret_scope")
|
|
121
|
+
auth_2 = SecretScopeAuth({"secret": "key"}, "my_other_secret_scope")
|
|
122
|
+
chained_auth = ChainedAuth(auth_1, auth_2)
|
|
123
|
+
```
|
|
124
|
+
"""
|
|
125
|
+
self.auths = list(args)
|
|
126
|
+
|
|
127
|
+
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
|
128
|
+
"""The header is constructed using the template and the secret retrieved from the secret scope.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
r: The request that needs to be sent.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
PreparedRequest: The same request object, but with an added header. The header
|
|
135
|
+
is constructed using the template and the secret retrieved from
|
|
136
|
+
the secret scope.
|
|
137
|
+
"""
|
|
138
|
+
for auth in self.auths:
|
|
139
|
+
r = auth(r)
|
|
140
|
+
return r
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class EnvVariableAuth(AuthBase):
|
|
144
|
+
"""This Auth can be used to create an auth header from environment variables."""
|
|
145
|
+
|
|
146
|
+
def __init__(self, header_template: dict[str, str]):
|
|
147
|
+
"""Initializes the EnvVariableAuth with a header template.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
header_template: The template for the header that will use the environment variables.
|
|
151
|
+
variable names are defined as placeholders.
|
|
152
|
+
|
|
153
|
+
Example:
|
|
154
|
+
```python
|
|
155
|
+
header_template = {
|
|
156
|
+
"user": "USER_NAME",
|
|
157
|
+
"password": "USER_SECRET",
|
|
158
|
+
}
|
|
159
|
+
auth = EnvVariableAuth(header_template)
|
|
160
|
+
# given, that "USER_NAME" and "USER_SECRET" are environment variables
|
|
161
|
+
```
|
|
162
|
+
"""
|
|
163
|
+
self.header_template = header_template
|
|
164
|
+
|
|
165
|
+
def __call__(self, r: PreparedRequest) -> PreparedRequest:
|
|
166
|
+
"""The header is constructed using the template and the secret retrieved from the secret scope.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
r: The request that needs to be sent.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
PreparedRequest: The same request object, but with an added header. The header
|
|
173
|
+
is constructed using the template and the secret retrieved from
|
|
174
|
+
environment variables.
|
|
175
|
+
"""
|
|
176
|
+
auth_header = {key: os.environ.get(value, "") for key, value in self.header_template.items()}
|
|
177
|
+
r.headers.update(auth_header)
|
|
178
|
+
return r
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
class APIClientError(Exception):
|
|
2
|
+
"""Base class for API client exceptions."""
|
|
3
|
+
|
|
4
|
+
pass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class APIClientHTTPError(APIClientError):
|
|
8
|
+
"""Exception raised for HTTP errors."""
|
|
9
|
+
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class APIClientConnectionError(APIClientError):
|
|
14
|
+
"""Exception raised for connection errors."""
|
|
15
|
+
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class APIClientTimeoutError(APIClientError):
|
|
20
|
+
"""Exception raised for timeout errors."""
|
|
21
|
+
|
|
22
|
+
pass
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from .location_types import LocationType
|
|
2
|
+
from .strategies.local_strategy import LocalDirectoryStrategy
|
|
3
|
+
from .strategies.onelake_strategy import OneLakeStrategy
|
|
4
|
+
from .strategies.utils_strategy import UtilsStrategy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FileRetrievalFactory:
|
|
8
|
+
"""Factory for creating file retrieval strategies based on location type.
|
|
9
|
+
|
|
10
|
+
This factory class is responsible for returning the appropriate strategy
|
|
11
|
+
implementation for retrieving files based on the specified location type.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
_strategy_map = {
|
|
15
|
+
LocationType.LOCAL: LocalDirectoryStrategy,
|
|
16
|
+
LocationType.VOLUME: LocalDirectoryStrategy,
|
|
17
|
+
LocationType.ABFS: UtilsStrategy,
|
|
18
|
+
LocationType.S3: UtilsStrategy,
|
|
19
|
+
LocationType.ONELAKE: OneLakeStrategy,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def get_strategy(location_type: LocationType) -> LocalDirectoryStrategy | OneLakeStrategy | UtilsStrategy:
|
|
24
|
+
"""Returns the appropriate file retrieval strategy for the given location type.
|
|
25
|
+
|
|
26
|
+
Depending on the provided location type, this method returns an instance
|
|
27
|
+
of either `LocalDirectoryStrategy` or `UtilsStrategy`. If the
|
|
28
|
+
location type is not recognized, a `ValueError` is raised.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
location_type: The location type for which to get the retrieval strategy.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
FileRetrievalStrategy: An instance of the appropriate file retrieval strategy.
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
ValueError: If the provided location type is unknown or unsupported.
|
|
38
|
+
"""
|
|
39
|
+
strategy_class = FileRetrievalFactory._strategy_map.get(location_type)
|
|
40
|
+
if not strategy_class:
|
|
41
|
+
raise ValueError(f"Unknown location type: {location_type}")
|
|
42
|
+
return strategy_class() # type: ignore
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from ..logging.logger_mixin import LoggerMixin
|
|
4
|
+
from .factory import FileRetrievalFactory
|
|
5
|
+
from .location_types import LocationType
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_file_paths(location: str, file_name_pattern: str | None = None, search_subdirs: bool = True) -> list[str]:
|
|
9
|
+
"""Retrieves file paths from a specified location based on the provided criteria.
|
|
10
|
+
|
|
11
|
+
This function determines the type of location (e.g., local directory, blob storage),
|
|
12
|
+
retrieves the appropriate file retrieval strategy using a factory, and then uses
|
|
13
|
+
that strategy to get a list of file paths that match the given file_name_pattern and search options.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
location: The location to search for files. This could be a path to a local directory or a URI for blob storage.
|
|
17
|
+
file_name_pattern: The file file_name_pattern to filter by as string. None retrieves all files regardless of file_name_pattern.
|
|
18
|
+
search_subdirs: Whether to include files from subdirectories in the search.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
A list of file paths that match the specified criteria. The paths are returned as strings.
|
|
22
|
+
|
|
23
|
+
Raises:
|
|
24
|
+
ValueError: If the `location` argument is empty or None.
|
|
25
|
+
FileUtilitiesError: If an error occurs while determining the location type, retrieving the strategy, or getting file paths.
|
|
26
|
+
"""
|
|
27
|
+
logger = LoggerMixin().get_console_logger()
|
|
28
|
+
if not location:
|
|
29
|
+
raise ValueError("location is required")
|
|
30
|
+
|
|
31
|
+
logger.debug("location", location)
|
|
32
|
+
logger.debug("Getting location type")
|
|
33
|
+
location_type = get_location_type(location=location)
|
|
34
|
+
logger.debug("location_type", location_type)
|
|
35
|
+
strategy = FileRetrievalFactory.get_strategy(location_type)
|
|
36
|
+
logger.debug("strategy", strategy)
|
|
37
|
+
logger.info(
|
|
38
|
+
f"Retrieving file paths from location [ '{location}' ] with strategy [ '{strategy.__class__.__name__}' ]"
|
|
39
|
+
)
|
|
40
|
+
paths = strategy.get_file_paths(location, file_name_pattern, search_subdirs)
|
|
41
|
+
logger.debug("paths:", paths)
|
|
42
|
+
return paths
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_location_type(location: str) -> LocationType:
|
|
46
|
+
"""Get the location type based on the given location string.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
location: The location string to check.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
LocationType: The determined location type.
|
|
53
|
+
"""
|
|
54
|
+
location_mapping = {
|
|
55
|
+
"abfss://": LocationType.ABFS,
|
|
56
|
+
"/Volumes/": LocationType.VOLUME,
|
|
57
|
+
"s3://": LocationType.S3,
|
|
58
|
+
"/lakehouse/default/": LocationType.ONELAKE,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
for prefix, loc_type in location_mapping.items():
|
|
62
|
+
if location.startswith(prefix):
|
|
63
|
+
return loc_type
|
|
64
|
+
|
|
65
|
+
if os.path.isdir(location):
|
|
66
|
+
return LocationType.LOCAL
|
|
67
|
+
|
|
68
|
+
raise NotImplementedError(
|
|
69
|
+
f"Could not determine Location type of location [ '{location}' ]."
|
|
70
|
+
f"Ensure that the provided path is valid."
|
|
71
|
+
f"Available Location type implementations are: [ {', '.join(LocationType.list())} ].",
|
|
72
|
+
)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LocationType(Enum):
|
|
5
|
+
"""Enum representing different types of locations.
|
|
6
|
+
|
|
7
|
+
Attributes:
|
|
8
|
+
LOCAL: Represents a local location.
|
|
9
|
+
VOLUME: Represents a volume location.
|
|
10
|
+
ABFS: Represents an Azure Blob File System (ABFS) location.
|
|
11
|
+
ONELAKE: Represents a OneLake location.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
LOCAL = "local"
|
|
15
|
+
VOLUME = "volumes"
|
|
16
|
+
ABFS = "abfs"
|
|
17
|
+
S3 = "s3"
|
|
18
|
+
ONELAKE = "onelake"
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def list() -> list[str]:
|
|
22
|
+
"""Returns a list of all location type values.
|
|
23
|
+
|
|
24
|
+
This method provides a list of strings, each representing a location type.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
list of str: A list of all the values of the LocationType enum.
|
|
28
|
+
"""
|
|
29
|
+
return list(map(lambda location: location.value, LocationType))
|
|
File without changes
|