pycarlo 0.10.210__py3-none-any.whl → 0.12.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pycarlo might be problematic. Click here for more details.
- pycarlo/features/metadata/__init__.py +20 -3
- pycarlo/features/metadata/asset_allow_block_list.py +22 -0
- pycarlo/features/metadata/asset_filters_container.py +79 -0
- pycarlo/features/metadata/base_allow_block_list.py +137 -0
- pycarlo/features/metadata/metadata_allow_block_list.py +94 -0
- pycarlo/features/metadata/metadata_filters_container.py +25 -16
- pycarlo/lib/README.md +34 -2
- pycarlo/lib/schema.json +63285 -50245
- pycarlo/lib/schema.py +6090 -1654
- pycarlo/lib/types.py +68 -0
- {pycarlo-0.10.210.dist-info → pycarlo-0.12.57.dist-info}/METADATA +107 -36
- {pycarlo-0.10.210.dist-info → pycarlo-0.12.57.dist-info}/RECORD +15 -11
- {pycarlo-0.10.210.dist-info → pycarlo-0.12.57.dist-info}/WHEEL +1 -1
- pycarlo/features/metadata/allow_block_list.py +0 -159
- {pycarlo-0.10.210.dist-info → pycarlo-0.12.57.dist-info}/LICENSE +0 -0
- {pycarlo-0.10.210.dist-info → pycarlo-0.12.57.dist-info}/top_level.txt +0 -0
pycarlo/lib/types.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom GraphQL types for the Monte Carlo Python Schema Library.
|
|
3
|
+
|
|
4
|
+
This module provides custom implementations of/replacements for sgqlc types that are used
|
|
5
|
+
in the auto-generated schema.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Optional, Union
|
|
10
|
+
|
|
11
|
+
import sgqlc.types
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Enum(sgqlc.types.Enum):
|
|
17
|
+
"""
|
|
18
|
+
A backward-compatible GraphQL enum type that gracefully handles unknown values.
|
|
19
|
+
|
|
20
|
+
Problem:
|
|
21
|
+
When new enum values are added to the Monte Carlo GraphQL API, older SDK versions
|
|
22
|
+
that don't have these values in their generated schema will crash with a ValueError
|
|
23
|
+
when trying to deserialize API responses containing the new values.
|
|
24
|
+
|
|
25
|
+
Solution:
|
|
26
|
+
This custom Enum class returns unknown enum values as plain strings instead of
|
|
27
|
+
raising an error. Since sgqlc enums are already represented as strings internally,
|
|
28
|
+
this maintains full compatibility with existing code while preventing crashes.
|
|
29
|
+
|
|
30
|
+
Behavior:
|
|
31
|
+
- Known enum values: Returned as strings (same as sgqlc.types.Enum)
|
|
32
|
+
- Unknown enum values: Returned as strings with a warning logged
|
|
33
|
+
- All comparisons, collections, and operations work identically
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
# Previous Values for EntitlementTypes = ['SSO', 'MULTI_WORKSPACE']
|
|
37
|
+
# API is updated to return new value: ['SSO', 'NEW_FEATURE', 'MULTI_WORKSPACE']
|
|
38
|
+
|
|
39
|
+
# With standard sgqlc.types.Enum:
|
|
40
|
+
# ValueError: EntitlementTypes does not accept value NEW_FEATURE
|
|
41
|
+
|
|
42
|
+
# With this Enum:
|
|
43
|
+
# Will return the new value as str and log a warning, no exception raised
|
|
44
|
+
|
|
45
|
+
# Code still works:
|
|
46
|
+
if 'SSO' in entitlements: # Works
|
|
47
|
+
enable_sso()
|
|
48
|
+
if 'NEW_FEATURE' in entitlements: # Also works
|
|
49
|
+
enable_new_feature()
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __new__(
|
|
53
|
+
cls, json_data: Any, _: Optional[Any] = None
|
|
54
|
+
) -> Union[str, sgqlc.types.Variable, None]:
|
|
55
|
+
try:
|
|
56
|
+
return sgqlc.types.get_variable_or_none(json_data)
|
|
57
|
+
except ValueError:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
if json_data not in cls:
|
|
61
|
+
# Log warning but don't crash - return the unknown value as a string
|
|
62
|
+
logger.warning(
|
|
63
|
+
f"Unknown enum value '{json_data}' for {cls.__name__}. "
|
|
64
|
+
f"This may indicate the SDK is out of date. Returning raw string value."
|
|
65
|
+
)
|
|
66
|
+
return str(json_data)
|
|
67
|
+
|
|
68
|
+
return str(json_data)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pycarlo
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.57
|
|
4
4
|
Summary: Monte Carlo's Python SDK
|
|
5
5
|
Home-page: https://www.montecarlodata.com/
|
|
6
6
|
Author: Monte Carlo Data, Inc
|
|
@@ -19,15 +19,13 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Requires-Python: >=3.8
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
License-File: LICENSE
|
|
22
|
-
Requires-Dist: dataclasses-json
|
|
23
|
-
Requires-Dist: python-box
|
|
24
|
-
Requires-Dist: requests
|
|
25
|
-
Requires-Dist: responses
|
|
26
|
-
Requires-Dist: sgqlc
|
|
22
|
+
Requires-Dist: dataclasses-json<6.0.0,>=0.5.7
|
|
23
|
+
Requires-Dist: python-box>=5.0.0
|
|
24
|
+
Requires-Dist: requests<3.0.0,>=2.0.0
|
|
25
|
+
Requires-Dist: responses>=0.20.0
|
|
26
|
+
Requires-Dist: sgqlc<17.0,>=14.1
|
|
27
27
|
|
|
28
|
-
# Pycarlo
|
|
29
|
-
|
|
30
|
-
Monte Carlo's Alpha Python SDK!
|
|
28
|
+
# Pycarlo - Monte Carlo's Python SDK
|
|
31
29
|
|
|
32
30
|
## Installation
|
|
33
31
|
|
|
@@ -40,23 +38,22 @@ virtualenv venv
|
|
|
40
38
|
pip install -U pycarlo
|
|
41
39
|
```
|
|
42
40
|
|
|
43
|
-
Developers of the SDK can use:
|
|
44
|
-
|
|
45
|
-
```shell
|
|
46
|
-
make install-with-tests
|
|
47
|
-
. venv/bin/activate
|
|
48
|
-
pre-commit install
|
|
49
|
-
```
|
|
50
|
-
|
|
51
41
|
## Overview
|
|
52
42
|
|
|
53
43
|
Pycarlo comprises two components: `core` and `features`.
|
|
54
44
|
|
|
55
|
-
All Monte Carlo API queries and mutations that you could execute via the API are supported via the
|
|
45
|
+
All Monte Carlo API queries and mutations that you could execute via the API are supported via the
|
|
46
|
+
`core` library. Operations can be executed as first class objects, using
|
|
47
|
+
[sgqlc](https://github.com/profusion/sgqlc), or as raw GQL with variables. In both cases, a
|
|
48
|
+
consistent object where fields can be referenced by dot notation and the more pythonic snake_case is
|
|
49
|
+
returned for ease of use.
|
|
56
50
|
|
|
57
|
-
The `features` library provides additional convenience for performing common operations like with
|
|
51
|
+
The `features` library provides additional convenience for performing common operations like with
|
|
52
|
+
dbt, circuit breaking, and pii filtering.
|
|
58
53
|
|
|
59
|
-
Note that an API Key is required to use the SDK. See
|
|
54
|
+
Note that an API Key is required to use the SDK. See
|
|
55
|
+
[our docs on generating API keys](https://docs.getmontecarlo.com/docs/developer-resources#creating-an-api-key)
|
|
56
|
+
for details.
|
|
60
57
|
|
|
61
58
|
## Basic usage
|
|
62
59
|
|
|
@@ -144,13 +141,78 @@ print(client(mutation))
|
|
|
144
141
|
# ]
|
|
145
142
|
```
|
|
146
143
|
|
|
147
|
-
|
|
144
|
+
### Examples
|
|
148
145
|
|
|
149
|
-
|
|
146
|
+
#### Circuit Breaker Example
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from pycarlo.core import Client, Session
|
|
150
|
+
from pycarlo.features.circuit_breakers import CircuitBreakerService
|
|
151
|
+
|
|
152
|
+
# Example from our test.snowflake account.
|
|
153
|
+
endpoint = "https://api.dev.getmontecarlo.com/graphql"
|
|
154
|
+
|
|
155
|
+
service = CircuitBreakerService(
|
|
156
|
+
mc_client=Client(Session(mcd_profile="test-snow", endpoint=endpoint)), print_func=print
|
|
157
|
+
)
|
|
158
|
+
in_breach = service.trigger_and_poll(rule_uuid="87872875-fe80-4963-8ab0-c04397a6daae")
|
|
159
|
+
print("That can't be good. Our warehouse is broken." if in_breach else "Go, go, go!.")
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
#### Insight Upload Example
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
from pathlib import Path
|
|
166
|
+
|
|
167
|
+
import boto3
|
|
168
|
+
import requests
|
|
169
|
+
|
|
170
|
+
from pycarlo.core import Client, Query
|
|
171
|
+
|
|
172
|
+
MC_CLIENT = Client()
|
|
173
|
+
S3_CLIENT = boto3.client("s3")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def upload_insights_to_s3(
|
|
177
|
+
destination_bucket: str,
|
|
178
|
+
desired_file_extension: str = ".csv",
|
|
179
|
+
) -> None:
|
|
180
|
+
"""
|
|
181
|
+
Example function for listing all insights in an account, and uploading any available
|
|
182
|
+
to S3 as a CSV.
|
|
183
|
+
"""
|
|
184
|
+
list_insights_query = Query()
|
|
185
|
+
list_insights_query.get_insights()
|
|
186
|
+
for insight in MC_CLIENT(list_insights_query).get_insights:
|
|
187
|
+
report_name = str(Path(insight.name).with_suffix(desired_file_extension))
|
|
188
|
+
|
|
189
|
+
if insight.available:
|
|
190
|
+
report_url_query = Query()
|
|
191
|
+
report_url_query.get_report_url(insight_name=insight.name, report_name=report_name)
|
|
192
|
+
report_url = MC_CLIENT(report_url_query).get_report_url.url
|
|
193
|
+
|
|
194
|
+
print(f"Uploading {report_name} to {destination_bucket}.")
|
|
195
|
+
S3_CLIENT.upload_fileobj(
|
|
196
|
+
Fileobj=requests.get(url=report_url, stream=True).raw,
|
|
197
|
+
Bucket=destination_bucket,
|
|
198
|
+
Key=report_name,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
if __name__ == "__main__":
|
|
203
|
+
upload_insights_to_s3(destination_bucket="<BUCKET-NAME>")
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
See [Monte Carlo's API reference](https://apidocs.getmontecarlo.com/) for all supported queries and
|
|
207
|
+
mutations.
|
|
208
|
+
|
|
209
|
+
For details and additional examples on how to map (convert) GraphQL queries to `sgqlc` operations
|
|
210
|
+
please refer to [the sgqlc docs](https://sgqlc.readthedocs.io/en/latest/sgqlc.operation.html).
|
|
150
211
|
|
|
151
212
|
### Features
|
|
152
213
|
|
|
153
|
-
You can use [pydoc](https://docs.python.org/
|
|
214
|
+
You can use [pydoc](https://docs.python.org/library/pydoc.html) to retrieve documentation on any
|
|
215
|
+
feature packages (`pydoc pycarlo.features`).
|
|
154
216
|
|
|
155
217
|
For instance for [circuit breakers](https://docs.getmontecarlo.com/docs/circuit-breakers):
|
|
156
218
|
|
|
@@ -160,9 +222,13 @@ pydoc pycarlo.features.circuit_breakers.service
|
|
|
160
222
|
|
|
161
223
|
## Session configuration
|
|
162
224
|
|
|
163
|
-
By default, when creating a client the `default` profile from `~/.mcd/profiles.ini` is used. This
|
|
225
|
+
By default, when creating a client the `default` profile from `~/.mcd/profiles.ini` is used. This
|
|
226
|
+
file created via
|
|
227
|
+
[montecarlo configure](https://docs.getmontecarlo.com/docs/using-the-cli#setting-up-the-cli) on the
|
|
228
|
+
CLI. See [Monte Carlo's CLI reference](https://clidocs.getmontecarlo.com/) for more details.
|
|
164
229
|
|
|
165
|
-
You can override this usage by creating a custom `Session`. For instance, if you want to pass the ID
|
|
230
|
+
You can override this usage by creating a custom `Session`. For instance, if you want to pass the ID
|
|
231
|
+
and Token:
|
|
166
232
|
|
|
167
233
|
```python
|
|
168
234
|
from pycarlo.core import Client, Session
|
|
@@ -174,7 +240,8 @@ Sessions support the following params:
|
|
|
174
240
|
|
|
175
241
|
- `mcd_id`: API Key ID.
|
|
176
242
|
- `mcd_token`: API secret.
|
|
177
|
-
- `mcd_profile`: Named profile containing credentials. This is created via the CLI (e.g.
|
|
243
|
+
- `mcd_profile`: Named profile containing credentials. This is created via the CLI (e.g.
|
|
244
|
+
`montecarlo configure --profile-name zeus`).
|
|
178
245
|
- `mcd_config_path`: Path to file containing credentials. Defaults to `~/.mcd/`.
|
|
179
246
|
|
|
180
247
|
You can also specify the API Key, secret or profile name using the following environment variables:
|
|
@@ -183,7 +250,8 @@ You can also specify the API Key, secret or profile name using the following env
|
|
|
183
250
|
- `MCD_DEFAULT_API_TOKEN`
|
|
184
251
|
- `MCD_DEFAULT_PROFILE`
|
|
185
252
|
|
|
186
|
-
When creating a session any explicitly passed `mcd_id` and `mcd_token` params take precedence,
|
|
253
|
+
When creating a session any explicitly passed `mcd_id` and `mcd_token` params take precedence,
|
|
254
|
+
followed by environmental variables and then any config-file options.
|
|
187
255
|
|
|
188
256
|
Environment variables can be mixed with passed credentials, but not the config-file profile.
|
|
189
257
|
|
|
@@ -191,9 +259,11 @@ Environment variables can be mixed with passed credentials, but not the config-f
|
|
|
191
259
|
|
|
192
260
|
## Integration Gateway API
|
|
193
261
|
|
|
194
|
-
There are features that require the Integration Gateway API instead of the regular GraphQL
|
|
262
|
+
There are features that require the Integration Gateway API instead of the regular GraphQL
|
|
263
|
+
Application API, for example Airflow Callbacks invoked by the `airflow-mcd` library.
|
|
195
264
|
|
|
196
|
-
To use the Gateway you need to initialize the `Session` object passing a `scope` parameter and then
|
|
265
|
+
To use the Gateway you need to initialize the `Session` object passing a `scope` parameter and then
|
|
266
|
+
use `make_request` to invoke Gateway endpoints:
|
|
197
267
|
|
|
198
268
|
```python
|
|
199
269
|
from pycarlo.core import Client, Session
|
|
@@ -211,17 +281,18 @@ The following values also be set by the environment:
|
|
|
211
281
|
- `MCD_VERBOSE_ERRORS`: Enable logging. This includes a trace ID for each session and request.
|
|
212
282
|
- `MCD_API_ENDPOINT`: Customize the endpoint where queries and mutations are executed.
|
|
213
283
|
|
|
214
|
-
##
|
|
215
|
-
|
|
216
|
-
To update queries and mutations via introspection, use `make generate`.
|
|
284
|
+
## Enum Backward Compatibility
|
|
217
285
|
|
|
218
|
-
|
|
286
|
+
Unlike the baseline `sgqlc` behavior, this SDK is designed to maintain backward compatibility when
|
|
287
|
+
new enum values are added to the Monte Carlo API. If the API returns an enum value that doesn't
|
|
288
|
+
exist in your SDK version, it will be returned as a string with a warning logged, rather than
|
|
289
|
+
raising an error. This allows older SDK versions to continue working when new features are added.
|
|
219
290
|
|
|
220
|
-
|
|
291
|
+
To avoid warnings and ensure full feature support, keep your SDK updated to the latest version.
|
|
221
292
|
|
|
222
293
|
## References
|
|
223
294
|
|
|
224
|
-
-
|
|
295
|
+
- Monte Carlo App: <https://getmontecarlo.com>
|
|
225
296
|
- Product docs: <https://docs.getmontecarlo.com>
|
|
226
297
|
- Status page: <https://status.getmontecarlo.com>
|
|
227
298
|
- API (and SDK): <https://apidocs.getmontecarlo.com>
|
|
@@ -229,4 +300,4 @@ When ready to release, create a new [Github release](https://docs.github.com/en/
|
|
|
229
300
|
|
|
230
301
|
## License
|
|
231
302
|
|
|
232
|
-
Apache 2.0 - See the [
|
|
303
|
+
Apache 2.0 - See the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) for more information.
|
|
@@ -20,9 +20,12 @@ pycarlo/features/circuit_breakers/service.py,sha256=TljwMOhA5igBumkpJwM22iEJGyvt
|
|
|
20
20
|
pycarlo/features/dbt/__init__.py,sha256=A2cFr8_aSY_kDw1m7jR6QkHfiBMC1cZ6O8WosF9XrRg,85
|
|
21
21
|
pycarlo/features/dbt/dbt_importer.py,sha256=eJb9Jiu7tAEb_xsLO-ycDOjjVm-LLfTtbAzlzIRxT5I,7328
|
|
22
22
|
pycarlo/features/dbt/queries.py,sha256=9o1HevRECYyGXQ0lG0LrN4iuuXCS-SWjz7NTgAAZIro,621
|
|
23
|
-
pycarlo/features/metadata/__init__.py,sha256=
|
|
24
|
-
pycarlo/features/metadata/
|
|
25
|
-
pycarlo/features/metadata/
|
|
23
|
+
pycarlo/features/metadata/__init__.py,sha256=0RDVHnwPvQcNXkeXEAxL4VJ8VWrl2P0fft_Kl2nlo7I,912
|
|
24
|
+
pycarlo/features/metadata/asset_allow_block_list.py,sha256=jXCS7HtUJhexEXZyRzyN4MT-BPSaMTKWCWiBT_l-Ijo,761
|
|
25
|
+
pycarlo/features/metadata/asset_filters_container.py,sha256=O15SC6u7HMGlViYnX7H8MRFttQrIXQd3a0oOrz7My5U,3411
|
|
26
|
+
pycarlo/features/metadata/base_allow_block_list.py,sha256=c8zd0BXkNSfQ3LkNC_A0KrO2AsYtIowc8FjbdyDyDu0,4738
|
|
27
|
+
pycarlo/features/metadata/metadata_allow_block_list.py,sha256=HzgXE0WhwDZoBHeV04e8dwe88rUSVVnOl8nFEPlb0jA,3496
|
|
28
|
+
pycarlo/features/metadata/metadata_filters_container.py,sha256=p7FNg71KYZZrvgJmlx9rID2TAL_e9LKzNQZ9KKN5uGs,12812
|
|
26
29
|
pycarlo/features/pii/__init__.py,sha256=w5X-oD8HWaL6fP2jt40AhlXO-MNzlVAlhRaZ5kQqAZY,247
|
|
27
30
|
pycarlo/features/pii/constants.py,sha256=XWeiikXk9AtljdWsGfl49b9zI6w8EzK8F__Euc0vQ3w,70
|
|
28
31
|
pycarlo/features/pii/pii_filterer.py,sha256=k53b_V_mddY4A17-DJ5vQKszFDlUaiP3E650JFHAeJA,6209
|
|
@@ -33,12 +36,13 @@ pycarlo/features/user/exceptions.py,sha256=Za5mPMynNDW_UQkfMbCSGjP1ht-xSUkwoI6hV
|
|
|
33
36
|
pycarlo/features/user/models.py,sha256=fhvS7tBhTtx7p9624yN5tzebXs5ERrl1XQ9B_DYor0E,126
|
|
34
37
|
pycarlo/features/user/queries.py,sha256=m97RvM0oiBlrU5xmOwe_JJ5N0G0NG5hIOeyQqN2O8_4,170
|
|
35
38
|
pycarlo/features/user/service.py,sha256=DHkhuonySaHro07NTd0YNe3cNkDk62CiRTY77dhVaMs,2890
|
|
36
|
-
pycarlo/lib/README.md,sha256=
|
|
39
|
+
pycarlo/lib/README.md,sha256=CVVrPPgje7pkXNNsPvwLSeUOm5aktb22MlttmoxX08k,1677
|
|
37
40
|
pycarlo/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
pycarlo/lib/schema.json,sha256=
|
|
39
|
-
pycarlo/lib/schema.py,sha256=
|
|
40
|
-
pycarlo
|
|
41
|
-
pycarlo-0.
|
|
42
|
-
pycarlo-0.
|
|
43
|
-
pycarlo-0.
|
|
44
|
-
pycarlo-0.
|
|
41
|
+
pycarlo/lib/schema.json,sha256=s0_ck5vcAbkjeyxzn8CZ0KYCr--Czk7DJQ-jansG3J8,6776875
|
|
42
|
+
pycarlo/lib/schema.py,sha256=aqS4QkN3JxpCwEO3DY13VHU2DhR1snlST3RQVI0MGrw,2931179
|
|
43
|
+
pycarlo/lib/types.py,sha256=lGOrm5Qm-SieDAkOkVOFSgyUJYGOjKnea961AD9Dv6s,2404
|
|
44
|
+
pycarlo-0.12.57.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
45
|
+
pycarlo-0.12.57.dist-info/METADATA,sha256=kqg28PY9BLtXGA8s4lK5ywODUCyd3lpEsr99-pSVjMY,10431
|
|
46
|
+
pycarlo-0.12.57.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
47
|
+
pycarlo-0.12.57.dist-info/top_level.txt,sha256=TIE04H4pgzGaFxAB-gvkmVAUOAoHxxFfhnEcpuQ5bF4,8
|
|
48
|
+
pycarlo-0.12.57.dist-info/RECORD,,
|
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
import enum
|
|
2
|
-
import re
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Any, Callable, List, Optional
|
|
5
|
-
|
|
6
|
-
from dataclasses_json import config, dataclass_json
|
|
7
|
-
|
|
8
|
-
from pycarlo.common import get_logger
|
|
9
|
-
|
|
10
|
-
logger = get_logger(__name__)
|
|
11
|
-
|
|
12
|
-
# For documentation and samples check the link below:
|
|
13
|
-
# https://www.notion.so/montecarlodata/Catalog-Schema-Filtering-59edd6eff7f74c94ab6bfca75d2e3ff1
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _exclude_none_values(value: Any) -> bool:
|
|
17
|
-
return value is None
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class FilterEffectType(enum.Enum):
|
|
21
|
-
BLOCK = "block"
|
|
22
|
-
ALLOW = "allow"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class FilterType(enum.Enum):
|
|
26
|
-
EXACT_MATCH = "exact_match"
|
|
27
|
-
PREFIX = "prefix"
|
|
28
|
-
SUFFIX = "suffix"
|
|
29
|
-
SUBSTRING = "substring"
|
|
30
|
-
REGEXP = "regexp"
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@dataclass_json
|
|
34
|
-
@dataclass
|
|
35
|
-
class MetadataFilter:
|
|
36
|
-
# we're using exclude=_exclude_none_values to prevent these properties to be serialized to json
|
|
37
|
-
# when None, to keep the json doc simpler
|
|
38
|
-
project: Optional[str] = field(metadata=config(exclude=_exclude_none_values), default=None)
|
|
39
|
-
dataset: Optional[str] = field(metadata=config(exclude=_exclude_none_values), default=None)
|
|
40
|
-
table_type: Optional[str] = field(metadata=config(exclude=_exclude_none_values), default=None)
|
|
41
|
-
table_name: Optional[str] = field(metadata=config(exclude=_exclude_none_values), default=None)
|
|
42
|
-
type: FilterType = FilterType.EXACT_MATCH
|
|
43
|
-
effect: FilterEffectType = FilterEffectType.BLOCK
|
|
44
|
-
|
|
45
|
-
def matches(self, force_regexp: bool = False, **kwargs: Any) -> bool:
|
|
46
|
-
"""
|
|
47
|
-
Returns True if all properties specified in kwargs match the conditions specified in
|
|
48
|
-
properties of the same name in this object.
|
|
49
|
-
Supported keys in kwargs: 'project', 'dataset', 'table', 'table_type'
|
|
50
|
-
For example kwargs={'project': 'prj_1'} will evaluate if 'prj_1' matches the condition in
|
|
51
|
-
self.project. For kwargs={'project': 'prj_1', 'dataset': 'ds_1'} will evaluate if 'prj_1'
|
|
52
|
-
matches the condition in self.project and if 'ds_1' matches the condition in self.dataset.
|
|
53
|
-
If any of the conditions (for example self.project) is None, that condition will be matched.
|
|
54
|
-
"""
|
|
55
|
-
if not kwargs:
|
|
56
|
-
raise ValueError("At least one field needs to be specified for matching")
|
|
57
|
-
|
|
58
|
-
# kwargs must match the field names in this class, if any of them do not,
|
|
59
|
-
# invalidate the filter.
|
|
60
|
-
try:
|
|
61
|
-
is_match = all(
|
|
62
|
-
self._safe_match(
|
|
63
|
-
component=getattr(self, component),
|
|
64
|
-
value=value,
|
|
65
|
-
force_regexp=force_regexp,
|
|
66
|
-
filter_type=self.type
|
|
67
|
-
if self.filter_type_target_field() == component
|
|
68
|
-
else FilterType.EXACT_MATCH,
|
|
69
|
-
)
|
|
70
|
-
for component, value in kwargs.items()
|
|
71
|
-
)
|
|
72
|
-
except AttributeError:
|
|
73
|
-
is_match = False
|
|
74
|
-
|
|
75
|
-
return is_match
|
|
76
|
-
|
|
77
|
-
def filter_type_target_field(self) -> str:
|
|
78
|
-
"""
|
|
79
|
-
The field that is evaluated using filter type. Other fields should be
|
|
80
|
-
compared using exact match.
|
|
81
|
-
"""
|
|
82
|
-
if self.table_name is not None:
|
|
83
|
-
return "table_name"
|
|
84
|
-
if self.dataset is not None:
|
|
85
|
-
return "dataset"
|
|
86
|
-
if self.project is not None:
|
|
87
|
-
return "project"
|
|
88
|
-
|
|
89
|
-
logger.exception("Invalid filter, missing target values")
|
|
90
|
-
return ""
|
|
91
|
-
|
|
92
|
-
@classmethod
|
|
93
|
-
def _safe_match(
|
|
94
|
-
cls,
|
|
95
|
-
component: Optional[str],
|
|
96
|
-
value: Optional[str],
|
|
97
|
-
force_regexp: bool,
|
|
98
|
-
filter_type: FilterType,
|
|
99
|
-
) -> bool:
|
|
100
|
-
# Field not specified on this object, e.g. self.dataset=None, which matches everything
|
|
101
|
-
if component is None:
|
|
102
|
-
return True
|
|
103
|
-
# The value in kwargs is empty, it does not match the condition.
|
|
104
|
-
if value is None:
|
|
105
|
-
return False
|
|
106
|
-
|
|
107
|
-
# Convert it in lowercase. In the normalizer we are converting identifiers
|
|
108
|
-
# (like project/dataset) to lowercase so the metadata filters may be defined with
|
|
109
|
-
# lowercase on the UI, however on Snowflake the identifiers are usually in uppercase.
|
|
110
|
-
# Therefore, we perform the evaluation case-insensitive.
|
|
111
|
-
component = component.lower()
|
|
112
|
-
value = value.lower()
|
|
113
|
-
|
|
114
|
-
if force_regexp or filter_type == FilterType.REGEXP:
|
|
115
|
-
regexp = f"^{component}$" # Anchor the regexp to be more strict about what to match.
|
|
116
|
-
return re.match(regexp, value) is not None
|
|
117
|
-
elif filter_type == FilterType.PREFIX:
|
|
118
|
-
return value.startswith(component)
|
|
119
|
-
elif filter_type == FilterType.SUFFIX:
|
|
120
|
-
return value.endswith(component)
|
|
121
|
-
elif filter_type == FilterType.SUBSTRING:
|
|
122
|
-
return component in value
|
|
123
|
-
else:
|
|
124
|
-
return component == value
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
@dataclass_json
|
|
128
|
-
@dataclass
|
|
129
|
-
class AllowBlockList:
|
|
130
|
-
filters: List[MetadataFilter] = field(default_factory=list)
|
|
131
|
-
default_effect: FilterEffectType = FilterEffectType.ALLOW
|
|
132
|
-
|
|
133
|
-
@property
|
|
134
|
-
def other_effect(self) -> FilterEffectType:
|
|
135
|
-
return (
|
|
136
|
-
FilterEffectType.ALLOW
|
|
137
|
-
if self.default_effect == FilterEffectType.BLOCK
|
|
138
|
-
else FilterEffectType.BLOCK
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
def get_default_effect_filters(
|
|
142
|
-
self, condition: Optional[Callable[[MetadataFilter], bool]] = None
|
|
143
|
-
) -> List[MetadataFilter]:
|
|
144
|
-
return list(
|
|
145
|
-
filter(
|
|
146
|
-
lambda f: f.effect == self.default_effect and (condition is None or condition(f)),
|
|
147
|
-
self.filters,
|
|
148
|
-
)
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
def get_other_effect_filters(
|
|
152
|
-
self, condition: Optional[Callable[[MetadataFilter], bool]] = None
|
|
153
|
-
) -> List[MetadataFilter]:
|
|
154
|
-
return list(
|
|
155
|
-
filter(
|
|
156
|
-
lambda f: f.effect != self.default_effect and (condition is None or condition(f)),
|
|
157
|
-
self.filters,
|
|
158
|
-
)
|
|
159
|
-
)
|
|
File without changes
|
|
File without changes
|