acryl-datahub 1.2.0.11rc1__py3-none-any.whl → 1.2.0.11rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/METADATA +2557 -2557
- {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/RECORD +39 -37
- datahub/_version.py +1 -1
- datahub/cli/docker_cli.py +1 -1
- datahub/configuration/common.py +11 -0
- datahub/configuration/kafka.py +19 -1
- datahub/configuration/validate_field_removal.py +3 -0
- datahub/ingestion/autogenerated/capability_summary.json +2 -2
- datahub/ingestion/graph/client.py +7 -7
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/source/aws/s3_boto_utils.py +4 -1
- datahub/ingestion/source/data_lake_common/path_spec.py +39 -2
- datahub/ingestion/source/looker/looker_common.py +6 -0
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +30 -2
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +42 -29
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/s3/source.py +125 -164
- datahub/ingestion/source/snaplogic/snaplogic.py +4 -4
- datahub/ingestion/source/snaplogic/snaplogic_config.py +4 -4
- datahub/ingestion/source/snowflake/snowflake_utils.py +9 -9
- datahub/metadata/_internal_schema_classes.py +1 -1
- datahub/metadata/schema.avsc +1 -1
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +1 -1
- datahub/sdk/search_filters.py +122 -1
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/sql_parsing/sqlglot_lineage.py +6 -1
- {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/top_level.txt +0 -0
datahub/sdk/search_filters.py
CHANGED
|
@@ -30,7 +30,14 @@ from datahub.ingestion.graph.filters import (
|
|
|
30
30
|
_get_status_filter,
|
|
31
31
|
)
|
|
32
32
|
from datahub.metadata.schema_classes import EntityTypeName
|
|
33
|
-
from datahub.metadata.urns import
|
|
33
|
+
from datahub.metadata.urns import (
|
|
34
|
+
ContainerUrn,
|
|
35
|
+
CorpGroupUrn,
|
|
36
|
+
CorpUserUrn,
|
|
37
|
+
DataPlatformUrn,
|
|
38
|
+
DomainUrn,
|
|
39
|
+
)
|
|
40
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
34
41
|
|
|
35
42
|
_AndSearchFilterRule = TypedDict(
|
|
36
43
|
"_AndSearchFilterRule", {"and": List[SearchFilterRule]}
|
|
@@ -235,6 +242,94 @@ class _EnvFilter(_BaseFilter):
|
|
|
235
242
|
]
|
|
236
243
|
|
|
237
244
|
|
|
245
|
+
class _OwnerFilter(_BaseFilter):
|
|
246
|
+
"""Filter for entities owned by specific users or groups."""
|
|
247
|
+
|
|
248
|
+
owner: List[str] = pydantic.Field(
|
|
249
|
+
description="The owner to filter on. Should be user or group URNs.",
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
@pydantic.validator("owner", each_item=True)
|
|
253
|
+
def validate_owner(cls, v: str) -> str:
|
|
254
|
+
if not v.startswith("urn:li:"):
|
|
255
|
+
raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
|
|
256
|
+
_type = guess_entity_type(v)
|
|
257
|
+
if _type == CorpUserUrn.ENTITY_TYPE:
|
|
258
|
+
return str(CorpUserUrn.from_string(v))
|
|
259
|
+
elif _type == CorpGroupUrn.ENTITY_TYPE:
|
|
260
|
+
return str(CorpGroupUrn.from_string(v))
|
|
261
|
+
else:
|
|
262
|
+
raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
|
|
263
|
+
|
|
264
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
265
|
+
return SearchFilterRule(
|
|
266
|
+
field="owners",
|
|
267
|
+
condition="EQUAL",
|
|
268
|
+
values=self.owner,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def compile(self) -> _OrFilters:
|
|
272
|
+
return [{"and": [self._build_rule()]}]
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class _GlossaryTermFilter(_BaseFilter):
|
|
276
|
+
"""Filter for entities associated with specific glossary terms."""
|
|
277
|
+
|
|
278
|
+
glossary_term: List[str] = pydantic.Field(
|
|
279
|
+
description="The glossary term to filter on. Should be glossary term URNs.",
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
@pydantic.validator("glossary_term", each_item=True)
|
|
283
|
+
def validate_glossary_term(cls, v: str) -> str:
|
|
284
|
+
if not v.startswith("urn:li:"):
|
|
285
|
+
raise ValueError(f"Glossary term must be a valid URN, got: {v}")
|
|
286
|
+
# Validate that it's a glossary term URN
|
|
287
|
+
_type = guess_entity_type(v)
|
|
288
|
+
if _type != "glossaryTerm":
|
|
289
|
+
raise ValueError(
|
|
290
|
+
f"Glossary term must be a valid glossary term URN, got: {v}"
|
|
291
|
+
)
|
|
292
|
+
return v
|
|
293
|
+
|
|
294
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
295
|
+
return SearchFilterRule(
|
|
296
|
+
field="glossaryTerms",
|
|
297
|
+
condition="EQUAL",
|
|
298
|
+
values=self.glossary_term,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
def compile(self) -> _OrFilters:
|
|
302
|
+
return [{"and": [self._build_rule()]}]
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class _TagFilter(_BaseFilter):
|
|
306
|
+
"""Filter for entities associated with specific tags."""
|
|
307
|
+
|
|
308
|
+
tag: List[str] = pydantic.Field(
|
|
309
|
+
description="The tag to filter on. Should be tag URNs.",
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
@pydantic.validator("tag", each_item=True)
|
|
313
|
+
def validate_tag(cls, v: str) -> str:
|
|
314
|
+
if not v.startswith("urn:li:"):
|
|
315
|
+
raise ValueError(f"Tag must be a valid URN, got: {v}")
|
|
316
|
+
# Validate that it's a tag URN
|
|
317
|
+
_type = guess_entity_type(v)
|
|
318
|
+
if _type != "tag":
|
|
319
|
+
raise ValueError(f"Tag must be a valid tag URN, got: {v}")
|
|
320
|
+
return v
|
|
321
|
+
|
|
322
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
323
|
+
return SearchFilterRule(
|
|
324
|
+
field="tags",
|
|
325
|
+
condition="EQUAL",
|
|
326
|
+
values=self.tag,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def compile(self) -> _OrFilters:
|
|
330
|
+
return [{"and": [self._build_rule()]}]
|
|
331
|
+
|
|
332
|
+
|
|
238
333
|
class _CustomCondition(_BaseFilter):
|
|
239
334
|
"""Represents a single field condition."""
|
|
240
335
|
|
|
@@ -407,6 +502,9 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
|
|
|
407
502
|
_DomainFilter,
|
|
408
503
|
_ContainerFilter,
|
|
409
504
|
_EnvFilter,
|
|
505
|
+
_OwnerFilter,
|
|
506
|
+
_GlossaryTermFilter,
|
|
507
|
+
_TagFilter,
|
|
410
508
|
_CustomCondition,
|
|
411
509
|
]
|
|
412
510
|
|
|
@@ -448,6 +546,11 @@ else:
|
|
|
448
546
|
_ContainerFilter, Tag(_ContainerFilter._field_discriminator())
|
|
449
547
|
],
|
|
450
548
|
Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
|
|
549
|
+
Annotated[_OwnerFilter, Tag(_OwnerFilter._field_discriminator())],
|
|
550
|
+
Annotated[
|
|
551
|
+
_GlossaryTermFilter, Tag(_GlossaryTermFilter._field_discriminator())
|
|
552
|
+
],
|
|
553
|
+
Annotated[_TagFilter, Tag(_TagFilter._field_discriminator())],
|
|
451
554
|
Annotated[
|
|
452
555
|
_CustomCondition, Tag(_CustomCondition._field_discriminator())
|
|
453
556
|
],
|
|
@@ -551,6 +654,24 @@ class FilterDsl:
|
|
|
551
654
|
def env(env: Union[str, Sequence[str]], /) -> _EnvFilter:
|
|
552
655
|
return _EnvFilter(env=[env] if isinstance(env, str) else env)
|
|
553
656
|
|
|
657
|
+
@staticmethod
|
|
658
|
+
def owner(owner: Union[str, Sequence[str]], /) -> _OwnerFilter:
|
|
659
|
+
return _OwnerFilter(owner=[owner] if isinstance(owner, str) else owner)
|
|
660
|
+
|
|
661
|
+
@staticmethod
|
|
662
|
+
def glossary_term(
|
|
663
|
+
glossary_term: Union[str, Sequence[str]], /
|
|
664
|
+
) -> _GlossaryTermFilter:
|
|
665
|
+
return _GlossaryTermFilter(
|
|
666
|
+
glossary_term=[glossary_term]
|
|
667
|
+
if isinstance(glossary_term, str)
|
|
668
|
+
else glossary_term
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
@staticmethod
|
|
672
|
+
def tag(tag: Union[str, Sequence[str]], /) -> _TagFilter:
|
|
673
|
+
return _TagFilter(tag=[tag] if isinstance(tag, str) else tag)
|
|
674
|
+
|
|
554
675
|
@staticmethod
|
|
555
676
|
def has_custom_property(key: str, value: str) -> _CustomCondition:
|
|
556
677
|
return _CustomCondition(
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Dict, List, Union
|
|
3
|
+
|
|
4
|
+
from datahub.secret.secret_store import SecretStore
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Simple SecretStore implementation that fetches Secret values from the local environment.
|
|
8
|
+
class EnvironmentSecretStore(SecretStore):
|
|
9
|
+
def __init__(self, config):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
def close(self) -> None:
|
|
13
|
+
return
|
|
14
|
+
|
|
15
|
+
def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
|
|
16
|
+
values = {}
|
|
17
|
+
for secret_name in secret_names:
|
|
18
|
+
values[secret_name] = os.getenv(secret_name)
|
|
19
|
+
return values
|
|
20
|
+
|
|
21
|
+
def get_secret_value(self, secret_name: str) -> Union[str, None]:
|
|
22
|
+
return os.getenv(secret_name)
|
|
23
|
+
|
|
24
|
+
def get_id(self) -> str:
|
|
25
|
+
return "env"
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def create(cls, config: Dict) -> "EnvironmentSecretStore":
|
|
29
|
+
return cls(config)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict, List, Union
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from datahub.secret.secret_store import SecretStore
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FileSecretStoreConfig(BaseModel):
|
|
13
|
+
basedir: str = "/mnt/secrets"
|
|
14
|
+
max_length: int = 1024768
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Simple SecretStore implementation that fetches Secret values from the local files.
|
|
18
|
+
class FileSecretStore(SecretStore):
|
|
19
|
+
def __init__(self, config: FileSecretStoreConfig):
|
|
20
|
+
self.config = config
|
|
21
|
+
|
|
22
|
+
def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
|
|
23
|
+
values = {}
|
|
24
|
+
for secret_name in secret_names:
|
|
25
|
+
values[secret_name] = self.get_secret_value(secret_name)
|
|
26
|
+
return values
|
|
27
|
+
|
|
28
|
+
def get_secret_value(self, secret_name: str) -> Union[str, None]:
|
|
29
|
+
secret_path = os.path.join(self.config.basedir, secret_name)
|
|
30
|
+
if os.path.exists(secret_path):
|
|
31
|
+
with open(secret_path, "r") as f:
|
|
32
|
+
secret_value = f.read(self.config.max_length + 1)
|
|
33
|
+
if len(secret_value) > self.config.max_length:
|
|
34
|
+
logger.warning(
|
|
35
|
+
f"Secret {secret_name} is longer than {self.config.max_length} and will be truncated."
|
|
36
|
+
)
|
|
37
|
+
return secret_value[: self.config.max_length].rstrip()
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def get_id(self) -> str:
|
|
41
|
+
return "file"
|
|
42
|
+
|
|
43
|
+
def close(self) -> None:
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def create(cls, config: Any) -> "FileSecretStore":
|
|
48
|
+
config = FileSecretStoreConfig.parse_obj(config)
|
|
49
|
+
return cls(config)
|
|
@@ -70,3 +70,30 @@ class HasStructuredPropertiesPatch(MetadataPatchProposal):
|
|
|
70
70
|
),
|
|
71
71
|
)
|
|
72
72
|
return self
|
|
73
|
+
|
|
74
|
+
def set_structured_property_manual(
|
|
75
|
+
self, property: StructuredPropertyValueAssignmentClass
|
|
76
|
+
) -> Self:
|
|
77
|
+
"""Add or update a structured property, using a StructuredPropertyValueAssignmentClass object."""
|
|
78
|
+
|
|
79
|
+
self.remove_structured_property(property.propertyUrn)
|
|
80
|
+
self._add_patch(
|
|
81
|
+
StructuredPropertiesClass.ASPECT_NAME,
|
|
82
|
+
"add",
|
|
83
|
+
path=("properties", property.propertyUrn),
|
|
84
|
+
value=property,
|
|
85
|
+
)
|
|
86
|
+
return self
|
|
87
|
+
|
|
88
|
+
def add_structured_property_manual(
|
|
89
|
+
self, property: StructuredPropertyValueAssignmentClass
|
|
90
|
+
) -> Self:
|
|
91
|
+
"""Add a structured property, using a StructuredPropertyValueAssignmentClass object."""
|
|
92
|
+
|
|
93
|
+
self._add_patch(
|
|
94
|
+
StructuredPropertiesClass.ASPECT_NAME,
|
|
95
|
+
"add",
|
|
96
|
+
path=("properties", property.propertyUrn),
|
|
97
|
+
value=property,
|
|
98
|
+
)
|
|
99
|
+
return self
|
|
@@ -1176,7 +1176,12 @@ def _try_extract_select(
|
|
|
1176
1176
|
statement = sqlglot.exp.Select().select("*").from_(statement)
|
|
1177
1177
|
elif isinstance(statement, sqlglot.exp.Insert):
|
|
1178
1178
|
# TODO Need to map column renames in the expressions part of the statement.
|
|
1179
|
-
|
|
1179
|
+
# Preserve CTEs when extracting the SELECT expression from INSERT
|
|
1180
|
+
original_ctes = statement.ctes
|
|
1181
|
+
statement = statement.expression # Get the SELECT expression from the INSERT
|
|
1182
|
+
if isinstance(statement, sqlglot.exp.Query) and original_ctes:
|
|
1183
|
+
for cte in original_ctes:
|
|
1184
|
+
statement = statement.with_(alias=cte.alias, as_=cte.this)
|
|
1180
1185
|
elif isinstance(statement, sqlglot.exp.Update):
|
|
1181
1186
|
# Assumption: the output table is already captured in the modified tables list.
|
|
1182
1187
|
statement = _extract_select_from_update(statement)
|
|
File without changes
|
{acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|