pycarlo 0.12.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycarlo might be problematic. Click here for more details.

Files changed (48) hide show
  1. pycarlo/__init__.py +0 -0
  2. pycarlo/common/__init__.py +31 -0
  3. pycarlo/common/errors.py +31 -0
  4. pycarlo/common/files.py +78 -0
  5. pycarlo/common/http.py +36 -0
  6. pycarlo/common/mcon.py +26 -0
  7. pycarlo/common/retries.py +129 -0
  8. pycarlo/common/settings.py +89 -0
  9. pycarlo/common/utils.py +51 -0
  10. pycarlo/core/__init__.py +10 -0
  11. pycarlo/core/client.py +267 -0
  12. pycarlo/core/endpoint.py +289 -0
  13. pycarlo/core/operations.py +25 -0
  14. pycarlo/core/session.py +127 -0
  15. pycarlo/features/__init__.py +10 -0
  16. pycarlo/features/circuit_breakers/__init__.py +3 -0
  17. pycarlo/features/circuit_breakers/exceptions.py +10 -0
  18. pycarlo/features/circuit_breakers/service.py +346 -0
  19. pycarlo/features/dbt/__init__.py +3 -0
  20. pycarlo/features/dbt/dbt_importer.py +208 -0
  21. pycarlo/features/dbt/queries.py +31 -0
  22. pycarlo/features/exceptions.py +18 -0
  23. pycarlo/features/metadata/__init__.py +32 -0
  24. pycarlo/features/metadata/asset_allow_block_list.py +22 -0
  25. pycarlo/features/metadata/asset_filters_container.py +79 -0
  26. pycarlo/features/metadata/base_allow_block_list.py +137 -0
  27. pycarlo/features/metadata/metadata_allow_block_list.py +94 -0
  28. pycarlo/features/metadata/metadata_filters_container.py +262 -0
  29. pycarlo/features/pii/__init__.py +5 -0
  30. pycarlo/features/pii/constants.py +3 -0
  31. pycarlo/features/pii/pii_filterer.py +179 -0
  32. pycarlo/features/pii/queries.py +20 -0
  33. pycarlo/features/pii/service.py +56 -0
  34. pycarlo/features/user/__init__.py +4 -0
  35. pycarlo/features/user/exceptions.py +10 -0
  36. pycarlo/features/user/models.py +9 -0
  37. pycarlo/features/user/queries.py +13 -0
  38. pycarlo/features/user/service.py +71 -0
  39. pycarlo/lib/README.md +35 -0
  40. pycarlo/lib/__init__.py +0 -0
  41. pycarlo/lib/schema.json +210020 -0
  42. pycarlo/lib/schema.py +82620 -0
  43. pycarlo/lib/types.py +68 -0
  44. pycarlo-0.12.24.dist-info/LICENSE +201 -0
  45. pycarlo-0.12.24.dist-info/METADATA +249 -0
  46. pycarlo-0.12.24.dist-info/RECORD +48 -0
  47. pycarlo-0.12.24.dist-info/WHEEL +5 -0
  48. pycarlo-0.12.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,208 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+ from typing import Callable, Dict, Optional, Union, cast
4
+ from uuid import UUID
5
+
6
+ from dataclasses_json import LetterCase, dataclass_json
7
+
8
+ from pycarlo.common import get_logger, http
9
+ from pycarlo.common.files import BytesFileReader, JsonFileReader, to_path
10
+ from pycarlo.common.settings import (
11
+ HEADER_MCD_TELEMETRY_REASON,
12
+ HEADER_MCD_TELEMETRY_SERVICE,
13
+ RequestReason,
14
+ )
15
+ from pycarlo.core import Client, Query
16
+ from pycarlo.features.dbt.queries import (
17
+ GET_DBT_UPLOAD_URL,
18
+ SEND_DBT_ARTIFACTS_EVENT,
19
+ )
20
+ from pycarlo.features.pii import PiiFilterer, PiiService
21
+ from pycarlo.features.user import UserService
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class InvalidArtifactsException(Exception):
27
+ pass
28
+
29
+
30
+ class InvalidFileFormatException(Exception):
31
+ pass
32
+
33
+
34
+ @dataclass_json(letter_case=LetterCase.CAMEL) # type: ignore
35
+ @dataclass
36
+ class DbtArtifacts:
37
+ manifest: str
38
+ run_results: str
39
+ logs: Optional[str]
40
+
41
+
42
+ class DbtImporter:
43
+ """
44
+ Import dbt run artifacts to Monte Carlo
45
+ """
46
+
47
+ DEFAULT_PROJECT_NAME = "default-project"
48
+ DEFAULT_JOB_NAME = "default-job"
49
+
50
+ def __init__(
51
+ self,
52
+ mc_client: Optional[Client] = None,
53
+ user_service: Optional[UserService] = None,
54
+ pii_service: Optional[PiiService] = None,
55
+ print_func: Callable = logger.info,
56
+ ):
57
+ self._mc_client = mc_client or Client()
58
+ self._user_service = user_service or UserService(mc_client=self._mc_client)
59
+ self._pii_service = pii_service or PiiService(mc_client=self._mc_client)
60
+ self._print_func = print_func
61
+ self._pii_filterer = self._init_pii_filterer()
62
+
63
+ def import_run(
64
+ self,
65
+ manifest_path: Union[Path, str],
66
+ run_results_path: Union[Path, str],
67
+ logs_path: Optional[Union[Path, str]] = None,
68
+ project_name: str = DEFAULT_PROJECT_NAME,
69
+ job_name: str = DEFAULT_JOB_NAME,
70
+ resource_id: Optional[Union[str, UUID]] = None,
71
+ ):
72
+ """
73
+ Import artifacts from a single dbt command execution.
74
+
75
+ :param manifest_path: local path to the dbt manifest file (manifest.json)
76
+ :param run_results_path: local path to the dbt run results file (run_results.json)
77
+ :param logs_path: local path to a file containing dbt run logs
78
+ :param project_name: Project name (perhaps a logical group of dbt models, analogous to a
79
+ project in dbt Cloud)
80
+ :param job_name: Job name (perhaps a logical sequence of dbt commands, analogous to a
81
+ job in dbt Cloud)
82
+ :param resource_id: identifier of a Monte Carlo resource (warehouse or lake) to use to
83
+ resolve dbt models to tables, this will be required if you have more
84
+ than one
85
+ """
86
+ # get resource
87
+ resource = self._user_service.get_resource(resource_id)
88
+
89
+ # read local artifacts
90
+ manifest = JsonFileReader(manifest_path).read()
91
+ run_results = JsonFileReader(run_results_path).read()
92
+ logs = BytesFileReader(logs_path).read() if logs_path else None
93
+
94
+ # extract dbt invocation id (and verify it is the same for each artifact)
95
+ invocation_id = self._get_invocation_id(
96
+ manifest_path=manifest_path,
97
+ manifest=manifest,
98
+ run_results_path=run_results_path,
99
+ run_results=run_results,
100
+ )
101
+
102
+ # upload artifacts to S3 (using pre-signed URLs)
103
+ artifacts = DbtArtifacts(
104
+ manifest=self._upload_artifact(
105
+ project_name=project_name,
106
+ invocation_id=invocation_id,
107
+ file_path=to_path(manifest_path),
108
+ content=manifest,
109
+ ),
110
+ run_results=self._upload_artifact(
111
+ project_name=project_name,
112
+ invocation_id=invocation_id,
113
+ file_path=to_path(run_results_path),
114
+ content=run_results,
115
+ ),
116
+ logs=self._upload_artifact(
117
+ project_name=project_name,
118
+ invocation_id=invocation_id,
119
+ file_path=to_path(logs_path), # type: ignore
120
+ content=logs,
121
+ )
122
+ if logs
123
+ else None,
124
+ )
125
+
126
+ # publish event indicating run artifacts are ready for processing
127
+ self._mc_client(
128
+ query=SEND_DBT_ARTIFACTS_EVENT,
129
+ variables=dict(
130
+ projectName=project_name,
131
+ jobName=job_name,
132
+ invocationId=invocation_id,
133
+ artifacts=artifacts.to_dict(), # type: ignore
134
+ resourceId=str(resource.id),
135
+ ),
136
+ additional_headers={
137
+ HEADER_MCD_TELEMETRY_REASON: RequestReason.SERVICE.value,
138
+ HEADER_MCD_TELEMETRY_SERVICE: "dbt_importer",
139
+ },
140
+ )
141
+
142
+ self._print_func("Finished sending run artifacts to Monte Carlo")
143
+
144
+ def _get_invocation_id(
145
+ self,
146
+ manifest_path: Union[Path, str],
147
+ manifest: Dict,
148
+ run_results_path: Union[Path, str],
149
+ run_results: Dict,
150
+ ) -> str:
151
+ manifest_invocation_id = self._extract_invocation_id(path=manifest_path, data=manifest)
152
+ run_results_invocation_id = self._extract_invocation_id(
153
+ path=run_results_path, data=run_results
154
+ )
155
+
156
+ if manifest_invocation_id != run_results_invocation_id:
157
+ raise InvalidArtifactsException(
158
+ "dbt invocation ids do not match between manifest and run results files"
159
+ )
160
+
161
+ return manifest_invocation_id
162
+
163
+ @staticmethod
164
+ def _extract_invocation_id(path: Union[Path, str], data: Dict) -> str:
165
+ try:
166
+ return data["metadata"]["invocation_id"]
167
+ except KeyError:
168
+ raise InvalidArtifactsException(
169
+ f"Unable to get dbt invocation id from '{path}'. Unexpected file format"
170
+ )
171
+
172
+ def _upload_artifact(
173
+ self,
174
+ project_name: str,
175
+ invocation_id: str,
176
+ file_path: Path,
177
+ content: Union[bytes, str, Dict],
178
+ ) -> str:
179
+ self._print_func(f"Uploading {file_path.name}...")
180
+ http.upload(
181
+ method="put",
182
+ url=self._get_presigned_url(
183
+ project_name=project_name, invocation_id=invocation_id, file_name=file_path.name
184
+ ),
185
+ content=self._pii_filterer.filter_content(content),
186
+ )
187
+ return file_path.name
188
+
189
+ def _get_presigned_url(self, project_name: str, invocation_id: str, file_name: str) -> str:
190
+ response = cast(
191
+ Query,
192
+ self._mc_client(
193
+ query=GET_DBT_UPLOAD_URL,
194
+ variables=dict(
195
+ projectName=project_name, invocationId=invocation_id, fileName=file_name
196
+ ),
197
+ additional_headers={
198
+ HEADER_MCD_TELEMETRY_REASON: RequestReason.SERVICE.value,
199
+ HEADER_MCD_TELEMETRY_SERVICE: "dbt_importer",
200
+ },
201
+ ),
202
+ )
203
+
204
+ return cast(str, response.get_dbt_upload_url)
205
+
206
+ def _init_pii_filterer(self):
207
+ pii_filters = self._pii_service.get_pii_filters_config()
208
+ return PiiFilterer(filters_config=pii_filters)
@@ -0,0 +1,31 @@
1
+ GET_DBT_UPLOAD_URL = """
2
+ query getDbtUploadUrl(
3
+ $projectName: String!,
4
+ $invocationId: String!,
5
+ $fileName: String!) {
6
+ getDbtUploadUrl(
7
+ projectName: $projectName,
8
+ invocationId: $invocationId,
9
+ fileName: $fileName
10
+ )
11
+ }
12
+ """
13
+
14
+ SEND_DBT_ARTIFACTS_EVENT = """
15
+ mutation sendDbtArtifactsEvent(
16
+ $projectName: String!,
17
+ $jobName: String!,
18
+ $invocationId: UUID!,
19
+ $artifacts: DbtArtifactsInput!,
20
+ $resourceId: UUID) {
21
+ sendDbtArtifactsEvent(
22
+ projectName: $projectName,
23
+ jobName: $jobName,
24
+ invocationId: $invocationId,
25
+ artifacts: $artifacts,
26
+ resourceId: $resourceId
27
+ ) {
28
+ ok
29
+ }
30
+ }
31
+ """
@@ -0,0 +1,18 @@
1
+ from pycarlo.features.circuit_breakers.exceptions import (
2
+ CircuitBreakerPipelineException,
3
+ CircuitBreakerPollException,
4
+ )
5
+ from pycarlo.features.user.exceptions import (
6
+ MultipleResourcesFoundException,
7
+ ResourceNotFoundException,
8
+ UserServiceException,
9
+ )
10
+
11
+ __all__ = [
12
+ "CircuitBreakerPipelineException",
13
+ "CircuitBreakerPipelineException",
14
+ "CircuitBreakerPollException",
15
+ "MultipleResourcesFoundException",
16
+ "ResourceNotFoundException",
17
+ "UserServiceException",
18
+ ]
@@ -0,0 +1,32 @@
1
+ from pycarlo.features.metadata.asset_allow_block_list import AssetAllowBlockList
2
+ from pycarlo.features.metadata.asset_filters_container import AssetFiltersContainer
3
+ from pycarlo.features.metadata.base_allow_block_list import (
4
+ BaseAllowBlockList,
5
+ ComparisonType,
6
+ FilterEffectType,
7
+ FilterRule,
8
+ FilterType,
9
+ RuleEffect,
10
+ )
11
+ from pycarlo.features.metadata.metadata_allow_block_list import (
12
+ MetadataAllowBlockList,
13
+ MetadataFilter,
14
+ )
15
+ from pycarlo.features.metadata.metadata_filters_container import MetadataFiltersContainer
16
+
17
+ __all__ = [
18
+ # Base classes
19
+ "FilterRule",
20
+ "BaseAllowBlockList",
21
+ "FilterEffectType",
22
+ "RuleEffect",
23
+ "FilterType",
24
+ "ComparisonType",
25
+ # Metadata filtering classes
26
+ "MetadataFilter",
27
+ "MetadataAllowBlockList",
28
+ "MetadataFiltersContainer",
29
+ # Asset filtering classes
30
+ "AssetAllowBlockList",
31
+ "AssetFiltersContainer",
32
+ ]
@@ -0,0 +1,22 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Optional
3
+
4
+ from dataclasses_json import DataClassJsonMixin
5
+
6
+ from pycarlo.common import get_logger
7
+ from pycarlo.features.metadata.base_allow_block_list import BaseAllowBlockList, FilterRule
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ @dataclass
13
+ class AssetAllowBlockList(BaseAllowBlockList[FilterRule], DataClassJsonMixin):
14
+ # JSON deserialization fails without this ugly override
15
+ rules: Optional[List[FilterRule]] = field(default_factory=list)
16
+
17
+ asset_type: Optional[str] = None
18
+
19
+ def __post_init__(self):
20
+ # We can't remove the default value because of properties with defaults in the parent class.
21
+ if not self.asset_type:
22
+ raise ValueError("asset_type is required")
@@ -0,0 +1,79 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Dict, List
3
+
4
+ from dataclasses_json import DataClassJsonMixin
5
+
6
+ from .asset_allow_block_list import AssetAllowBlockList
7
+ from .base_allow_block_list import FilterEffectType
8
+
9
+ # Mapping of resource types to their supported asset types for collection preferences.
10
+ # This is used for validating asset collection preferences.
11
+ # When support for filtering an asset type is implemented in the DC, it should be added here.
12
+ # The reason it is here instead of in Monolith, is so that it can be referenced by the CLI.
13
+ # The pycarlo version in CLI and monolith should be updated after updating this and releasing a
14
+ # new version.
15
+ ASSET_TYPE_ATTRIBUTES = {"tableau": {"project": ["name"], "workbook": ["name", "luid"]}}
16
+
17
+
18
+ @dataclass
19
+ class AssetFiltersContainer(DataClassJsonMixin):
20
+ """
21
+ Simple container for asset filtering that focuses on in-memory filtering for REST APIs.
22
+
23
+ This class provides basic asset filtering functionality without SQL generation complexity.
24
+ It's designed for the initial phase where assets are collected via REST APIs rather than
25
+ SQL queries.
26
+
27
+ Example usage:
28
+ # Block all external assets
29
+ filters = AssetAllowBlockList(
30
+ filters=[AssetFilter(asset_type="external", effect=FilterEffectType.BLOCK)]
31
+ )
32
+ container = AssetFiltersContainer(asset_filters=filters)
33
+
34
+ # Check if an asset is blocked
35
+ is_blocked = container.is_asset_blocked("external", "my_table") # True
36
+ is_blocked = container.is_asset_blocked("table", "users") # False
37
+ """
38
+
39
+ asset_filters: List[AssetAllowBlockList] = field(default_factory=list)
40
+
41
+ def is_asset_type_filtered(self, asset_type: str) -> bool:
42
+ """Returns True if any filters are configured for the given asset type."""
43
+ return bool(self._get_asset_filters(asset_type))
44
+
45
+ def is_asset_blocked(self, asset_type: str, attributes: Dict[str, str]) -> bool:
46
+ """
47
+ Returns True if the specified asset is blocked by the current filters.
48
+
49
+ Args:
50
+ asset_type: The type of asset (e.g., 'tableau_workbook_v2', 'jobs', 'power_bi_workspace')
51
+ attributes: A dictionary representing the attributes of the asset
52
+
53
+ Returns:
54
+ True if the asset is blocked, False if it's allowed
55
+ """
56
+ asset_filters = self._get_asset_filters(asset_type)
57
+
58
+ is_blocked = False
59
+
60
+ for asset_filter in asset_filters:
61
+ default_effect_matches = asset_filter.get_default_effect_rules(
62
+ lambda f: f.matches(force_regexp=False, **attributes)
63
+ )
64
+ if default_effect_matches:
65
+ is_blocked = asset_filter.default_effect == FilterEffectType.BLOCK
66
+ else:
67
+ other_effect_matches = asset_filter.get_other_effect_rules(
68
+ lambda f: f.matches(force_regexp=False, **attributes)
69
+ )
70
+ if other_effect_matches:
71
+ is_blocked = asset_filter.other_effect == FilterEffectType.BLOCK
72
+ else:
73
+ # No matches, use default effect
74
+ is_blocked = asset_filter.default_effect == FilterEffectType.BLOCK
75
+
76
+ return is_blocked
77
+
78
+ def _get_asset_filters(self, asset_type: str) -> List[AssetAllowBlockList]:
79
+ return [f for f in self.asset_filters if f.asset_type == asset_type]
@@ -0,0 +1,137 @@
1
+ import enum
2
+ import re
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Callable, Generic, List, Optional, TypeVar
5
+
6
+ from dataclasses_json import DataClassJsonMixin
7
+
8
+ from pycarlo.common import get_logger
9
+
10
+ logger = get_logger(__name__)
11
+
12
+ # For documentation and samples check the link below:
13
+ # https://www.notion.so/montecarlodata/Catalog-Schema-Filtering-59edd6eff7f74c94ab6bfca75d2e3ff1
14
+
15
+
16
+ def _exclude_none_values(value: Any) -> bool:
17
+ return value is None
18
+
19
+
20
+ class FilterEffectType(enum.Enum):
21
+ BLOCK = "block"
22
+ ALLOW = "allow"
23
+
24
+
25
+ RuleEffect = FilterEffectType
26
+
27
+
28
+ class FilterType(enum.Enum):
29
+ EXACT_MATCH = "exact_match"
30
+ PREFIX = "prefix"
31
+ SUFFIX = "suffix"
32
+ SUBSTRING = "substring"
33
+ REGEXP = "regexp"
34
+
35
+
36
+ ComparisonType = FilterType
37
+
38
+ # Type variable for the filter class
39
+ FilterRuleT = TypeVar("FilterRuleT", bound="FilterRule")
40
+
41
+
42
+ @dataclass
43
+ class RuleCondition(DataClassJsonMixin):
44
+ attribute_name: str
45
+ value: str
46
+ comparison_type: ComparisonType = ComparisonType.EXACT_MATCH
47
+
48
+
49
+ @dataclass
50
+ class FilterRule(DataClassJsonMixin):
51
+ """
52
+ Base class for all filter types. Provides common filtering logic that can be
53
+ shared between different filter implementations (e.g., metadata filters, asset filters).
54
+ """
55
+
56
+ conditions: Optional[List[RuleCondition]] = field(default_factory=list)
57
+ effect: RuleEffect = RuleEffect.BLOCK
58
+
59
+ def matches(self, force_regexp: bool = False, **kwargs: Any) -> bool:
60
+ """
61
+ Returns True if all properties specified in kwargs match the conditions specified in
62
+ properties of the same name in this object.
63
+ If any of the conditions (for example self.field) is None, that condition will be matched.
64
+ """
65
+ if not kwargs:
66
+ raise ValueError("At least one field needs to be specified for matching")
67
+
68
+ # kwargs must match the field names in this class, if any of them do not,
69
+ # invalidate the filter.
70
+ try:
71
+ return all(
72
+ condition.attribute_name not in kwargs
73
+ or self._match(
74
+ condition=condition,
75
+ value=kwargs.get(condition.attribute_name),
76
+ force_regexp=force_regexp,
77
+ )
78
+ for condition in self.conditions or []
79
+ )
80
+ except AttributeError:
81
+ return False
82
+
83
+ @classmethod
84
+ def _match(cls, condition: RuleCondition, value: Optional[str], force_regexp: bool) -> bool:
85
+ # Field not specified on this object, e.g. self.field=None, which matches everything
86
+ if value is None:
87
+ return False
88
+
89
+ # The comparison is performed case-insensitive (check BaseFilter._safe_match)
90
+ # We can use LOWER here since it is part of standard SQL (like AND/OR/NOT), so including it
91
+ # here is a way to make sure that all comparisons are case-insensitive in the SQL sentences
92
+ # for all engines. Added option to not always LOWER since customers do have lower/upper case
93
+ # databases logged in MC
94
+ filter_value = condition.value.lower()
95
+ value = value.lower()
96
+
97
+ if force_regexp or condition.comparison_type == FilterType.REGEXP:
98
+ regexp = f"^{filter_value}$"
99
+ return re.match(regexp, value) is not None
100
+ elif condition.comparison_type == FilterType.PREFIX:
101
+ return value.startswith(filter_value)
102
+ elif condition.comparison_type == FilterType.SUFFIX:
103
+ return value.endswith(filter_value)
104
+ elif condition.comparison_type == FilterType.SUBSTRING:
105
+ return filter_value in value
106
+ else: # filter_type == FilterType.EXACT_MATCH
107
+ return filter_value == value
108
+
109
+
110
+ @dataclass
111
+ class BaseAllowBlockList(Generic[FilterRuleT], DataClassJsonMixin):
112
+ rules: Optional[List[FilterRuleT]] = field(default_factory=list)
113
+ default_effect: RuleEffect = RuleEffect.ALLOW
114
+
115
+ @property
116
+ def other_effect(self) -> RuleEffect:
117
+ return RuleEffect.ALLOW if self.default_effect == RuleEffect.BLOCK else RuleEffect.BLOCK
118
+
119
+ def get_default_effect_rules(
120
+ self, condition: Optional[Callable[[FilterRuleT], bool]] = None
121
+ ) -> List[FilterRuleT]:
122
+ return list(
123
+ filter(
124
+ lambda f: f.effect == self.default_effect and (condition is None or condition(f)),
125
+ self.rules or [],
126
+ )
127
+ )
128
+
129
+ def get_other_effect_rules(
130
+ self, condition: Optional[Callable[[FilterRuleT], bool]] = None
131
+ ) -> List[FilterRuleT]:
132
+ return list(
133
+ filter(
134
+ lambda f: f.effect != self.default_effect and (condition is None or condition(f)),
135
+ self.rules or [],
136
+ )
137
+ )
@@ -0,0 +1,94 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Optional
3
+
4
+ from dataclasses_json import config, dataclass_json
5
+
6
+ from pycarlo.common import get_logger
7
+ from pycarlo.features.metadata.base_allow_block_list import (
8
+ BaseAllowBlockList,
9
+ ComparisonType,
10
+ FilterRule,
11
+ FilterType,
12
+ RuleCondition,
13
+ )
14
+
15
+ logger = get_logger(__name__)
16
+
17
+ # For documentation and samples check the link below:
18
+ # https://www.notion.so/montecarlodata/Catalog-Schema-Filtering-59edd6eff7f74c94ab6bfca75d2e3ff1
19
+
20
+
21
+ @dataclass_json
22
+ @dataclass
23
+ class MetadataFilter(FilterRule):
24
+ type: FilterType = FilterType.EXACT_MATCH
25
+
26
+ # we're using exclude=_exclude_none_values to prevent these properties to be serialized to json
27
+ # when None, to keep the json doc simpler
28
+ project: Optional[str] = field(metadata=config(exclude=lambda x: x is None), default=None)
29
+ dataset: Optional[str] = field(metadata=config(exclude=lambda x: x is None), default=None)
30
+ table_type: Optional[str] = field(metadata=config(exclude=lambda x: x is None), default=None)
31
+ table_name: Optional[str] = field(metadata=config(exclude=lambda x: x is None), default=None)
32
+
33
+ def __post_init__(self):
34
+ # For backwards compatibility, we now create a set of conditions based on the
35
+ # metadata-specific fields.
36
+ self.conditions = self.conditions or []
37
+ if self.table_name is not None:
38
+ is_target_field = self.filter_type_target_field() == "table_name"
39
+ condition = RuleCondition(
40
+ comparison_type=self.type if is_target_field else ComparisonType.EXACT_MATCH,
41
+ attribute_name="table_name",
42
+ value=self.table_name,
43
+ )
44
+ self.conditions.append(condition)
45
+
46
+ if self.dataset is not None:
47
+ is_target_field = self.filter_type_target_field() == "dataset"
48
+ condition = RuleCondition(
49
+ comparison_type=self.type if is_target_field else ComparisonType.EXACT_MATCH,
50
+ attribute_name="dataset",
51
+ value=self.dataset,
52
+ )
53
+ self.conditions.append(condition)
54
+
55
+ if self.project is not None:
56
+ is_target_field = self.filter_type_target_field() == "project"
57
+ condition = RuleCondition(
58
+ comparison_type=self.type if is_target_field else ComparisonType.EXACT_MATCH,
59
+ attribute_name="project",
60
+ value=self.project,
61
+ )
62
+ self.conditions.append(condition)
63
+
64
+ if self.table_type is not None:
65
+ condition = RuleCondition(
66
+ comparison_type=ComparisonType.EXACT_MATCH,
67
+ attribute_name="table_type",
68
+ value=self.table_type,
69
+ )
70
+ self.conditions.append(condition)
71
+
72
+ def filter_type_target_field(self) -> str:
73
+ """
74
+ The field that is evaluated using filter type. Other fields should be
75
+ compared using exact match.
76
+ """
77
+ if self.table_name is not None:
78
+ return "table_name"
79
+ if self.dataset is not None:
80
+ return "dataset"
81
+ if self.project is not None:
82
+ return "project"
83
+
84
+ logger.exception("Invalid filter, missing target values")
85
+ return ""
86
+
87
+
88
+ @dataclass_json
89
+ @dataclass
90
+ class MetadataAllowBlockList(BaseAllowBlockList[MetadataFilter]):
91
+ filters: List[MetadataFilter] = field(default_factory=list)
92
+
93
+ def __post_init__(self):
94
+ self.rules = self.filters