pycarlo 0.12.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pycarlo might be problematic. Click here for more details.

Files changed (48) hide show
  1. pycarlo/__init__.py +0 -0
  2. pycarlo/common/__init__.py +31 -0
  3. pycarlo/common/errors.py +31 -0
  4. pycarlo/common/files.py +78 -0
  5. pycarlo/common/http.py +36 -0
  6. pycarlo/common/mcon.py +26 -0
  7. pycarlo/common/retries.py +129 -0
  8. pycarlo/common/settings.py +89 -0
  9. pycarlo/common/utils.py +51 -0
  10. pycarlo/core/__init__.py +10 -0
  11. pycarlo/core/client.py +267 -0
  12. pycarlo/core/endpoint.py +289 -0
  13. pycarlo/core/operations.py +25 -0
  14. pycarlo/core/session.py +127 -0
  15. pycarlo/features/__init__.py +10 -0
  16. pycarlo/features/circuit_breakers/__init__.py +3 -0
  17. pycarlo/features/circuit_breakers/exceptions.py +10 -0
  18. pycarlo/features/circuit_breakers/service.py +346 -0
  19. pycarlo/features/dbt/__init__.py +3 -0
  20. pycarlo/features/dbt/dbt_importer.py +208 -0
  21. pycarlo/features/dbt/queries.py +31 -0
  22. pycarlo/features/exceptions.py +18 -0
  23. pycarlo/features/metadata/__init__.py +32 -0
  24. pycarlo/features/metadata/asset_allow_block_list.py +22 -0
  25. pycarlo/features/metadata/asset_filters_container.py +79 -0
  26. pycarlo/features/metadata/base_allow_block_list.py +137 -0
  27. pycarlo/features/metadata/metadata_allow_block_list.py +94 -0
  28. pycarlo/features/metadata/metadata_filters_container.py +262 -0
  29. pycarlo/features/pii/__init__.py +5 -0
  30. pycarlo/features/pii/constants.py +3 -0
  31. pycarlo/features/pii/pii_filterer.py +179 -0
  32. pycarlo/features/pii/queries.py +20 -0
  33. pycarlo/features/pii/service.py +56 -0
  34. pycarlo/features/user/__init__.py +4 -0
  35. pycarlo/features/user/exceptions.py +10 -0
  36. pycarlo/features/user/models.py +9 -0
  37. pycarlo/features/user/queries.py +13 -0
  38. pycarlo/features/user/service.py +71 -0
  39. pycarlo/lib/README.md +35 -0
  40. pycarlo/lib/__init__.py +0 -0
  41. pycarlo/lib/schema.json +210020 -0
  42. pycarlo/lib/schema.py +82620 -0
  43. pycarlo/lib/types.py +68 -0
  44. pycarlo-0.12.24.dist-info/LICENSE +201 -0
  45. pycarlo-0.12.24.dist-info/METADATA +249 -0
  46. pycarlo-0.12.24.dist-info/RECORD +48 -0
  47. pycarlo-0.12.24.dist-info/WHEEL +5 -0
  48. pycarlo-0.12.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,262 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Any, Callable, Dict, Optional
3
+
4
+ from dataclasses_json import dataclass_json
5
+
6
+ from pycarlo.features.metadata import (
7
+ FilterEffectType,
8
+ FilterType,
9
+ MetadataAllowBlockList,
10
+ MetadataFilter,
11
+ )
12
+
13
+
14
+ @dataclass_json
15
+ @dataclass
16
+ class MetadataFiltersContainer:
17
+ """
18
+ More documentation and samples in the link below:
19
+ https://www.notion.so/montecarlodata/Catalog-Schema-Filtering-59edd6eff7f74c94ab6bfca75d2e3ff1
20
+
21
+ MetadataFiltersContainer class that includes a metadata_filters list that works
22
+ in the following way:
23
+ A list of filters where:
24
+ - each filter can be a block or allow filter
25
+ - each filter can optionally filter: project, dataset, table and table_type
26
+ - each filter matches using of the following types: exact match, regular expression, prefix
27
+ - there's a default effect (allow/block) configured in the list, those elements with no matching
28
+ filter will be resolved with the default effect in the list.
29
+ - This class supports filtering objects in memory or generating SQL conditions for filtering,
30
+ for SQL generation an encoder function is required that maps the different filter types to
31
+ SQL, also a dictionary mapping from property name to column name is required (for example to
32
+ map 'project' to 'database' or 'dataset' to 'schema'.
33
+ - The order in which elements are added to the list is not relevant, priority is assigned based
34
+ on the effect, rules with the default effect have the higher priority and stop the iteration.
35
+
36
+ Filtering works by prioritizing "explicit" filters, a filter is considered explicit when it is
37
+ configured with the same effect used as the default in the list.
38
+ Let's suppose we have the following sample data:
39
+ - prj_1: ds_1, ds_2
40
+ - prj_2: ds_1, ds_2, ds_3
41
+ - project_3: dataset_1, ds_2
42
+ _ project_4: dataset_4
43
+
44
+ These are examples using the sample data above:
45
+ - list(default=allow): block(prj_*), allow(prj_1)
46
+ - will allow prj_1 and all those projects not matching prj_*
47
+ - allowed: (prj_1, all datasets), (project_3, all datasets), (project_4, all datasets)
48
+ - list(default=block): allow(prj_*), block(prj_1)
49
+ - will allow only prj_* except prj_1 that is explicitly blocked
50
+ - allowed: (prj_2, all datasets)
51
+ - list(default=allow): allow(*), block(prj_1)
52
+ - will allow everything, allow(*) is considered a explicit rule and has the highest priority
53
+ - allowed: everything
54
+ - list(default=allow): block(*), allow(prj_1)
55
+ - will allow only prj_1
56
+ - allowed: (prj_1, all datasets)
57
+ - list(default=allow): block(*, ds_*)
58
+ - will block all datasets named ds_*
59
+ - allowed: (project_3, dataset_1), (project_4, all datasets)
60
+ - list(default=allow): allow(prj_2, ds_3), block(*, ds_*)
61
+ - will block all datasets named ds_3 except for ds_3 in prj_2 that is explicitly allowed.
62
+ Please note order is not relevant and the result would be exactly the same for:
63
+ block(*, ds_*), allow(prj_2, ds_3)
64
+ - allowed: (prj_2, ds_3), (project_3, dataset_1), (project_4, all datasets)
65
+ - list(default=block): allow(*, dataset_*):
66
+ - only datasets named dataset_* will be allowed
67
+ - allowed: (project_3, dataset_1), (project_4, dataset_4)
68
+ """
69
+
70
+ metadata_filters: MetadataAllowBlockList = field(default_factory=MetadataAllowBlockList)
71
+
72
+ @property
73
+ def is_metadata_filtered(self) -> bool:
74
+ return bool(self.metadata_filters.filters)
75
+
76
+ @property
77
+ def is_metadata_blocked(self):
78
+ """
79
+ Helper method for detecting an edge case where everything is blocked because the default
80
+ effect is block and all filters are also blocking, in this case it doesn't make sense to
81
+ run queries or filter data.
82
+ """
83
+ return self.metadata_filters.default_effect == FilterEffectType.BLOCK and all(
84
+ f.effect == FilterEffectType.BLOCK for f in self.metadata_filters.filters
85
+ )
86
+
87
+ def is_project_with_datasets_filtered(self, project: str) -> bool:
88
+ """
89
+ Returns True if there's at least one filter configured for the specified project filtering
90
+ on datasets, this can be used to check if get_sql_conditions for the project is going to
91
+ return an empty query or not.
92
+ """
93
+ return self.is_metadata_filtered and any(
94
+ f.matches(project=project) and f.dataset is not None
95
+ for f in self.metadata_filters.filters
96
+ )
97
+
98
+ def is_whole_project_blocked(self, project: str) -> bool:
99
+ """
100
+ Helper method to be used when projects are iterated first, returns True if the project is
101
+ fully blocked (blocked by a filter with no dataset or blocked by default) False otherwise.
102
+ For example for a list like: list(default=allow): block(prj_1, ds_1) this method will return
103
+ False as prj_1 is not fully blocked, there might be a (prj_1, ds_2) that is allowed, so
104
+ prj_1 still needs to be iterated.
105
+ """
106
+ # the condition parameter below is to include blocking filters only if applied to the
107
+ # whole project, we don't want to exclude a project just because a dataset is excluded
108
+ effect = self._get_effect(
109
+ metadata_filters=self.metadata_filters,
110
+ force_regexp=False,
111
+ condition=lambda f: not f.dataset or f.effect == FilterEffectType.ALLOW,
112
+ project=project,
113
+ )
114
+ return effect == FilterEffectType.BLOCK
115
+
116
+ def is_metadata_element_allowed(self, **kwargs: Any) -> bool:
117
+ """
118
+ Metadata elements filtering, iterates all filters looking for a match, if there's
119
+ a match in a filter with the default effect, it's considered an explicit filter
120
+ and search stops with the result effect being the default one.
121
+ If there's a match in a filter not configured with the default effect, search continues.
122
+ When the search completes, if there was a match then the result effect will be the one in
123
+ the matched filter (must be the non-default effect).
124
+ If there was no match the result effect will be the default one.
125
+ Result for this method is True only if the result effect is ALLOW.
126
+
127
+ Data is matched using properties specified in kwargs, the following keys are supported
128
+ in kwargs: 'project', 'dataset', 'table', 'table_type'.
129
+ """
130
+ effect = self._get_effect(
131
+ metadata_filters=self.metadata_filters, force_regexp=False, **kwargs
132
+ )
133
+ return effect == FilterEffectType.ALLOW
134
+
135
+ @staticmethod
136
+ def _get_effect(
137
+ metadata_filters: MetadataAllowBlockList,
138
+ force_regexp: bool,
139
+ condition: Optional[Callable[[MetadataFilter], bool]] = None,
140
+ **kwargs: Any,
141
+ ) -> FilterEffectType:
142
+ """
143
+ Returns the effect for a metadata element with the properties specified by kwargs
144
+ (project, dataset, table, table_type).
145
+ If there's an explicit filter matching (a filter is explicit if the effect is the default
146
+ one) then default effect is returned.
147
+ If there's a match in the "other effect" list then the "other effect" is returned, we're
148
+ calling "other effect" to the effect that is not the default one.
149
+ If no matching filter, the default effect is returned.
150
+ """
151
+ if not metadata_filters.filters or any(
152
+ f.matches(force_regexp, **kwargs)
153
+ for f in metadata_filters.get_default_effect_rules(condition=condition)
154
+ ):
155
+ return metadata_filters.default_effect
156
+
157
+ if any(
158
+ f.matches(force_regexp, **kwargs)
159
+ for f in metadata_filters.get_other_effect_rules(condition=condition)
160
+ ):
161
+ return metadata_filters.other_effect
162
+
163
+ return metadata_filters.default_effect
164
+
165
+ def is_dataset_allowed(self, project: Optional[str], dataset: str) -> bool:
166
+ """
167
+ Helper method intended to be used when projects and datasets are iterated in memory.
168
+ It returns True if the dataset in the given project is allowed (not blocked), this is
169
+ equivalent to call is_metadata_element_allowed(project=project, dataset=dataset)
170
+ """
171
+ return self.is_metadata_element_allowed(project=project, dataset=dataset)
172
+
173
+ def get_sql_conditions(
174
+ self,
175
+ column_mapping: Dict, # maps project and dataset to the column name to use
176
+ encoder: Callable[[str, str, FilterType], str],
177
+ project: Optional[str] = None,
178
+ force_lowercase: Optional[bool] = True,
179
+ ) -> Optional[str]:
180
+ """
181
+ Helper method that returns a SQL query fragment with conditions for the current filters.
182
+ If project is specified this will return conditions only for the specified project and this
183
+ is supposed to be called after checking that is_project_with_datasets_filtered and
184
+ is_project_allowed returned True.
185
+ column_mapping is used to map filter fields (like project and dataset) to the actual
186
+ database columns (like database and schema).
187
+ encoder is used to encode a filter in the SQL dialect, it needs to encode to expressions
188
+ like "database = 'db_1'" or "database REGEXP 'db_.*'".
189
+ Examples:
190
+ - default=block, filters=allow(project=x_*), block(project=x_1), allow(project=z)
191
+ SQL: NOT(project='x_1') AND ((project REGEXP 'x_*') OR (project='z')
192
+ - default=allow, filters=block(project=x_*), allow(project=x_1), block(project=z)
193
+ SQL: project='x_1' OR (NOT(project REGEXP 'x_*') AND NOT(project='z'))
194
+ Basically we first put all filters matching the default condition joined by
195
+ AND/OR (block/allow), and then all filters with the other effect joined by OR/AND.
196
+ """
197
+ if not self.metadata_filters.filters:
198
+ return None
199
+ if project and not self.is_project_with_datasets_filtered(project):
200
+ return None
201
+
202
+ def project_condition(f: MetadataFilter):
203
+ return not project or f.matches(project=project)
204
+
205
+ default_effect = self.metadata_filters.default_effect
206
+ default_effect_filters = self.metadata_filters.get_default_effect_rules(
207
+ condition=project_condition
208
+ )
209
+ other_effect_filters = self.metadata_filters.get_other_effect_rules(
210
+ condition=project_condition
211
+ )
212
+ default_effect_op = " OR " if default_effect == FilterEffectType.ALLOW else " AND "
213
+ other_effect_op = " AND " if default_effect == FilterEffectType.ALLOW else " OR "
214
+
215
+ default_effect_conditions = default_effect_op.join(
216
+ [
217
+ self._get_sql_field_condition(f, column_mapping, encoder, force_lowercase)
218
+ for f in default_effect_filters
219
+ ]
220
+ )
221
+ other_effect_conditions = other_effect_op.join(
222
+ [
223
+ self._get_sql_field_condition(f, column_mapping, encoder, force_lowercase)
224
+ for f in other_effect_filters
225
+ ]
226
+ )
227
+
228
+ if default_effect_conditions and other_effect_conditions:
229
+ return f"(({default_effect_conditions}){default_effect_op}({other_effect_conditions}))"
230
+ elif default_effect_conditions:
231
+ return f"({default_effect_conditions})"
232
+ elif other_effect_conditions:
233
+ return f"({other_effect_conditions})"
234
+ else:
235
+ return None
236
+
237
+ @staticmethod
238
+ def _get_sql_field_condition(
239
+ mf: MetadataFilter,
240
+ column_mapping: Dict,
241
+ encoder: Callable[[str, str, FilterType], str],
242
+ force_lowercase: Optional[bool] = True,
243
+ ) -> str:
244
+ # The comparison is performed case-insensitive (check MetadataFilter._safe_match)
245
+ # We can use LOWER here since it is part of standard SQL (like AND/OR/NOT), so including it
246
+ # here is a way to make sure that all comparisons are case-insensitive in the SQL sentences
247
+ # for all engines. Added option to not always LOWER since customers do have lower/upper case
248
+ # databases logged in MC
249
+ conditions = " AND ".join(
250
+ [
251
+ encoder(
252
+ f"LOWER({column})" if force_lowercase else column,
253
+ getattr(mf, field).lower() if force_lowercase else getattr(mf, field),
254
+ mf.type if field == mf.filter_type_target_field() else FilterType.EXACT_MATCH,
255
+ )
256
+ for (field, column) in column_mapping.items()
257
+ if getattr(mf, field) is not None
258
+ ]
259
+ )
260
+ if not conditions:
261
+ return ""
262
+ return f"NOT({conditions})" if mf.effect == FilterEffectType.BLOCK else f"({conditions})"
@@ -0,0 +1,5 @@
1
+ from pycarlo.features.pii.constants import PiiFilteringFailModeType
2
+ from pycarlo.features.pii.pii_filterer import PiiFilterer
3
+ from pycarlo.features.pii.service import PiiService
4
+
5
+ __all__ = ["PiiFilteringFailModeType", "PiiService", "PiiFilterer"]
@@ -0,0 +1,3 @@
1
+ class PiiFilteringFailModeType:
2
+ OPEN = "OPEN"
3
+ CLOSE = "CLOSE"
@@ -0,0 +1,179 @@
1
+ import logging
2
+ import re
3
+ import time
4
+ from dataclasses import dataclass
5
+ from re import Pattern
6
+ from typing import Any, Callable, Dict, List, Optional, Union
7
+
8
+ from dataclasses_json import dataclass_json
9
+
10
+ _logger = logging.getLogger(__name__)
11
+
12
+
13
+ @dataclass_json
14
+ @dataclass
15
+ class PiiActiveFilter:
16
+ name: str
17
+ pattern: str
18
+
19
+
20
+ @dataclass_json
21
+ @dataclass
22
+ class PiiActiveFiltersConfig:
23
+ active: List[PiiActiveFilter]
24
+ fail_closed: bool = True
25
+
26
+ @staticmethod
27
+ def is_valid_config(cfg: Optional[Dict]) -> bool:
28
+ return bool(cfg and cfg.get("active", []))
29
+
30
+
31
+ @dataclass_json
32
+ @dataclass
33
+ class PiiFilterMetrics:
34
+ replacements: int
35
+ time_taken_ms: float
36
+
37
+
38
+ @dataclass
39
+ class PiiCompiledFilter:
40
+ name: str
41
+ compiled_expression: Pattern
42
+ replacement: str
43
+
44
+
45
+ class PiiFilterer:
46
+ _TOTAL_KEY = "_total"
47
+ _REPLACEMENT_STRING = "<filtered:{}>"
48
+
49
+ def __init__(self, filters_config: Optional[Dict], include_metrics: bool = True):
50
+ self._config = (
51
+ PiiActiveFiltersConfig.from_dict(filters_config) # type: ignore
52
+ if PiiActiveFiltersConfig.is_valid_config(filters_config)
53
+ else None
54
+ )
55
+
56
+ if self._config:
57
+ self._filters = [
58
+ PiiCompiledFilter(
59
+ name=f.name,
60
+ compiled_expression=re.compile(f.pattern),
61
+ replacement=self._REPLACEMENT_STRING.format(f.name),
62
+ )
63
+ for f in self._config.active
64
+ ]
65
+ self._include_metrics = include_metrics
66
+
67
+ @staticmethod
68
+ def _metrics_to_final(metrics: Dict, elapsed_time_ns: int) -> Dict:
69
+ final_metrics = {
70
+ k: PiiFilterMetrics(replacements=v, time_taken_ms=t / 1000000).to_dict() # type: ignore
71
+ for k, (v, t) in metrics.items()
72
+ }
73
+ total_replacements = sum(m["replacements"] for m in final_metrics.values())
74
+
75
+ final_metrics[PiiFilterer._TOTAL_KEY] = PiiFilterMetrics(
76
+ replacements=total_replacements, time_taken_ms=elapsed_time_ns / 1000000
77
+ ).to_dict() # type: ignore
78
+
79
+ return final_metrics
80
+
81
+ def filter_content(self, content: Union[bytes, str, Dict]) -> Union[bytes, str, Dict]:
82
+ """
83
+ Utility method to filter one of bytes, str or Dict. Calls internally one of:
84
+ - filter_data
85
+ - filter_str
86
+ - filter_message
87
+ """
88
+ if isinstance(content, dict):
89
+ return self.filter_message(content)
90
+ elif isinstance(content, str):
91
+ return self.filter_str(content)
92
+ else:
93
+ return self.filter_data(content)
94
+
95
+ def filter_message(self, msg: Union[bytes, str, Dict]) -> Union[bytes, str, Dict]:
96
+ """
97
+ Filters a dictionary or a list. If the object is a dictionary it includes metrics in a
98
+ pii_metrics attribute in the result dictionary (only if the filterer was created with
99
+ include_metrics=True which is the default value).
100
+ """
101
+ if not self._config:
102
+ return msg
103
+
104
+ start_time = time.time_ns()
105
+
106
+ include_metrics = self._include_metrics and isinstance(msg, Dict)
107
+ metrics = {} if include_metrics else None
108
+
109
+ result = self._do_filter(msg, lambda o: self._filter_object(o, metrics=metrics))
110
+
111
+ if include_metrics:
112
+ assert metrics is not None
113
+ elapsed_time = time.time_ns() - start_time
114
+ result["pii_metrics"] = self._metrics_to_final(metrics, elapsed_time_ns=elapsed_time)
115
+
116
+ return result
117
+
118
+ def _do_filter(self, data: Any, filter_function: Callable[[Any], Any]) -> Any:
119
+ if not self._config:
120
+ return data
121
+ try:
122
+ result = filter_function(data)
123
+ except Exception as exc:
124
+ if self._config.fail_closed:
125
+ raise
126
+ _logger.exception(
127
+ f"Failed to evaluate PII filters: {exc}, ignoring because fail_closed=False"
128
+ )
129
+ result = data
130
+ return result
131
+
132
+ def filter_str(self, msg: str) -> str:
133
+ """
134
+ Filters a string, please note metrics are not included as we don't have a way to include
135
+ metrics in the resulting string. We might return metrics in the future if we decide to
136
+ use them where we call this (dbtv2 collection)
137
+ """
138
+ return self._do_filter(msg, lambda o: self._filter_text(o))
139
+
140
+ def filter_data(self, msg: bytes, encoding: str = "utf8") -> bytes:
141
+ """
142
+ Filters a bytes array using the given encoding, please note metrics are not included as
143
+ we don't have a way to include metrics in the resulting bytes array. We might return metrics
144
+ in the future if we decide to use them where we call this (dbtv2 collection)
145
+ """
146
+ return self._do_filter(msg, lambda o: self._filter_bytes(o, encoding=encoding))
147
+
148
+ def _filter_bytes(self, data: bytes, encoding: str) -> bytes:
149
+ text = data.decode(encoding)
150
+ filtered_text = self._filter_text(text)
151
+ return filtered_text.encode(encoding)
152
+
153
+ def _filter_object(self, o: Any, metrics: Optional[Dict] = None) -> Any:
154
+ if isinstance(o, Dict):
155
+ return {
156
+ self._filter_text(k, metrics=metrics): self._filter_object(v, metrics=metrics)
157
+ for k, v in o.items()
158
+ }
159
+ elif isinstance(o, List):
160
+ return [self._filter_object(e, metrics=metrics) for e in o]
161
+ elif isinstance(o, tuple):
162
+ return tuple(self._filter_object(e, metrics=metrics) for e in o)
163
+ elif isinstance(o, str):
164
+ return self._filter_text(o, metrics=metrics)
165
+ else:
166
+ return o
167
+
168
+ def _filter_text(self, text: str, metrics: Optional[Dict] = None) -> str:
169
+ for f in self._filters:
170
+ filter_name = f.name
171
+ start_time = time.time_ns()
172
+ text, updated_count = f.compiled_expression.subn(f.replacement, text)
173
+ elapsed_time = time.time_ns() - start_time
174
+
175
+ if metrics is not None:
176
+ prev_count, prev_time = metrics.get(filter_name, (0, 0))
177
+ metrics[filter_name] = (prev_count + updated_count, prev_time + elapsed_time)
178
+
179
+ return text
@@ -0,0 +1,20 @@
1
+ # Queries related to PII Filtering
2
+
3
+ GET_PII_PREFERENCES = """
4
+ query getPiiFilteringPreferences {
5
+ getPiiFilteringPreferences {
6
+ enabled,
7
+ failMode
8
+ }
9
+ }
10
+ """
11
+
12
+ GET_PII_FILTERS = """
13
+ query getPiiFilters {
14
+ getPiiFilters {
15
+ name,
16
+ pattern,
17
+ enabled
18
+ }
19
+ }
20
+ """
@@ -0,0 +1,56 @@
1
+ from typing import Dict, Optional, cast
2
+
3
+ from pycarlo.common.settings import (
4
+ HEADER_MCD_TELEMETRY_REASON,
5
+ HEADER_MCD_TELEMETRY_SERVICE,
6
+ RequestReason,
7
+ )
8
+ from pycarlo.core import Client, Query
9
+ from pycarlo.features.pii import PiiFilteringFailModeType
10
+ from pycarlo.features.pii.pii_filterer import PiiActiveFilter, PiiActiveFiltersConfig
11
+ from pycarlo.features.pii.queries import GET_PII_FILTERS, GET_PII_PREFERENCES
12
+
13
+
14
+ class PiiService:
15
+ def __init__(self, mc_client: Optional[Client] = None):
16
+ self._mc_client = mc_client or Client()
17
+
18
+ def get_pii_filters_config(self) -> Optional[Dict]:
19
+ prefs = cast(
20
+ Query,
21
+ self._mc_client(
22
+ query=GET_PII_PREFERENCES,
23
+ additional_headers={
24
+ HEADER_MCD_TELEMETRY_REASON: RequestReason.SERVICE.value,
25
+ HEADER_MCD_TELEMETRY_SERVICE: "pii_service",
26
+ },
27
+ ),
28
+ ).get_pii_filtering_preferences
29
+ if not prefs.enabled:
30
+ return None
31
+
32
+ fail_closed = prefs.fail_mode.upper() == PiiFilteringFailModeType.CLOSE
33
+ pii_filters = cast(
34
+ Query,
35
+ self._mc_client(
36
+ query=GET_PII_FILTERS,
37
+ additional_headers={
38
+ HEADER_MCD_TELEMETRY_REASON: RequestReason.SERVICE.value,
39
+ HEADER_MCD_TELEMETRY_SERVICE: "pii_service",
40
+ },
41
+ ),
42
+ ).get_pii_filters
43
+ if not pii_filters:
44
+ return None
45
+
46
+ return PiiActiveFiltersConfig(
47
+ fail_closed=fail_closed,
48
+ active=[
49
+ PiiActiveFilter(
50
+ name=cast(str, f.name),
51
+ pattern=cast(str, f.pattern),
52
+ )
53
+ for f in pii_filters
54
+ if f.enabled
55
+ ],
56
+ ).to_dict() # type: ignore
@@ -0,0 +1,4 @@
1
+ from pycarlo.features.user.models import Resource
2
+ from pycarlo.features.user.service import UserService
3
+
4
+ __all__ = ["Resource", "UserService"]
@@ -0,0 +1,10 @@
1
+ class UserServiceException(Exception):
2
+ pass
3
+
4
+
5
+ class ResourceNotFoundException(UserServiceException):
6
+ pass
7
+
8
+
9
+ class MultipleResourcesFoundException(UserServiceException):
10
+ pass
@@ -0,0 +1,9 @@
1
+ from dataclasses import dataclass
2
+ from uuid import UUID
3
+
4
+
5
+ @dataclass
6
+ class Resource:
7
+ id: UUID
8
+ name: str
9
+ type: str
@@ -0,0 +1,13 @@
1
+ GET_USER_WAREHOUSES = """
2
+ query getUserWarehouses {
3
+ getUser {
4
+ account {
5
+ warehouses {
6
+ uuid
7
+ name
8
+ connectionType
9
+ }
10
+ }
11
+ }
12
+ }
13
+ """
@@ -0,0 +1,71 @@
1
+ from typing import Optional, Union, cast
2
+ from uuid import UUID
3
+
4
+ from pycarlo.common.settings import (
5
+ HEADER_MCD_TELEMETRY_REASON,
6
+ HEADER_MCD_TELEMETRY_SERVICE,
7
+ RequestReason,
8
+ )
9
+ from pycarlo.core import Client, Query
10
+ from pycarlo.features.user.exceptions import (
11
+ MultipleResourcesFoundException,
12
+ ResourceNotFoundException,
13
+ )
14
+ from pycarlo.features.user.models import Resource
15
+ from pycarlo.features.user.queries import GET_USER_WAREHOUSES
16
+
17
+
18
+ class UserService:
19
+ def __init__(self, mc_client: Optional[Client] = None):
20
+ self._mc_client = mc_client or Client()
21
+
22
+ def get_resource(self, resource_id: Optional[Union[str, UUID]] = None) -> Resource:
23
+ """
24
+ Get a resource (e.g. lake or warehouse).
25
+
26
+ :param resource_id: resource identifier. If not provided, and your account only has one
27
+ resource, it will be returned. If your account has multiple resources
28
+ an exception will be raised indicating a a resource id must be provided.
29
+
30
+ :return: resource (e.g. lake or warehouse monitored by Monte Carlo)
31
+ :raise MultipleResourcesFoundException: multiple resources
32
+ exist (a `resource_id` must be provided)
33
+ :raise ResourceNotFoundException: a resource could not be found
34
+ """
35
+ response = cast(
36
+ Query,
37
+ self._mc_client(
38
+ query=GET_USER_WAREHOUSES,
39
+ additional_headers={
40
+ HEADER_MCD_TELEMETRY_REASON: RequestReason.SERVICE.value,
41
+ HEADER_MCD_TELEMETRY_SERVICE: "user_service",
42
+ },
43
+ ),
44
+ )
45
+ warehouses = response.get_user.account.warehouses
46
+
47
+ # if resource id was provided, look for matching warehouse record
48
+ if resource_id:
49
+ for w in warehouses:
50
+ if w.uuid == str(resource_id):
51
+ return Resource(
52
+ id=UUID(w.uuid), # type: ignore
53
+ name=w.name, # type: ignore[reportArgumentType]
54
+ type=w.connection_type, # type: ignore[reportArgumentType]
55
+ )
56
+
57
+ # resource not found
58
+ raise ResourceNotFoundException(f"Resource not found with id={resource_id}")
59
+
60
+ # if only one warehouse exists, return it
61
+ if len(warehouses) == 1:
62
+ return Resource(
63
+ id=UUID(warehouses[0].uuid), # type: ignore
64
+ name=warehouses[0].name, # type: ignore[reportArgumentType]
65
+ type=warehouses[0].connection_type, # type: ignore[reportArgumentType]
66
+ )
67
+ # otherwise, raise an error requesting a resource id
68
+ else:
69
+ raise MultipleResourcesFoundException(
70
+ "Multiple resources found, please specify a resource id"
71
+ )
pycarlo/lib/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # Monte Carlo GraphQL Schema Library
2
+
3
+ The `schema.json` and `schema.py` files are auto-generated. **Do not edit them directly**!
4
+
5
+ If you need to customize the schema, see below. Refer to the
6
+ [CONTRIBUTING.md](../../CONTRIBUTING.md) for general development guidelines.
7
+
8
+ ## Schema Customizations
9
+
10
+ The generated `schema.py` is automatically modified during the build process to apply the following
11
+ customizations. This is done via `sed` commands in the [Makefile](../../Makefile), but if we need to
12
+ get fancier, we just can update the `customize-schema` target there to call whatever we need to do.
13
+
14
+ ### Connection Type Fix
15
+
16
+ The `Connection` class is changed from `sgqlc.types.relay.Connection` to `sgqlc.types.Type`.
17
+
18
+ **Why:** sgqlc automatically makes all types ending in "Connection" inherit from `relay.Connection`,
19
+ which makes `Connection` not a valid field type. This causes requests to fail when attempting to
20
+ resolve it. Changing it to inherit from `sgqlc.types.Type` fixes this issue.
21
+
22
+ [Related PR](https://github.com/monte-carlo-data/python-sdk/pull/63)
23
+
24
+ ### Backward-Compatible Enums
25
+
26
+ All GraphQL enum types use `pycarlo.lib.types.Enum` instead of `sgqlc.types.Enum`. This custom enum
27
+ class gracefully handles unknown enum values by returning them as strings instead of raising errors.
28
+
29
+ **Why:** When new enum values are added to the Monte Carlo API, older SDK versions would crash when
30
+ deserializing responses containing these new values. Our custom Enum prevents this by:
31
+
32
+ - Returning unknown values as plain strings (same type as known values)
33
+ - Logging a warning when unknown values are encountered
34
+
35
+ See [pycarlo/lib/types.py](types.py) for implementation details.
File without changes