acryl-datahub 1.2.0.11rc1__py3-none-any.whl → 1.2.0.11rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (39) hide show
  1. {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/METADATA +2557 -2557
  2. {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/RECORD +39 -37
  3. datahub/_version.py +1 -1
  4. datahub/cli/docker_cli.py +1 -1
  5. datahub/configuration/common.py +11 -0
  6. datahub/configuration/kafka.py +19 -1
  7. datahub/configuration/validate_field_removal.py +3 -0
  8. datahub/ingestion/autogenerated/capability_summary.json +2 -2
  9. datahub/ingestion/graph/client.py +7 -7
  10. datahub/ingestion/graph/filters.py +30 -11
  11. datahub/ingestion/source/aws/s3_boto_utils.py +4 -1
  12. datahub/ingestion/source/data_lake_common/path_spec.py +39 -2
  13. datahub/ingestion/source/looker/looker_common.py +6 -0
  14. datahub/ingestion/source/looker/looker_constant.py +4 -0
  15. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  16. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  17. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  18. datahub/ingestion/source/looker/lookml_config.py +30 -2
  19. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  20. datahub/ingestion/source/looker/lookml_source.py +42 -29
  21. datahub/ingestion/source/looker/view_upstream.py +494 -1
  22. datahub/ingestion/source/s3/source.py +125 -164
  23. datahub/ingestion/source/snaplogic/snaplogic.py +4 -4
  24. datahub/ingestion/source/snaplogic/snaplogic_config.py +4 -4
  25. datahub/ingestion/source/snowflake/snowflake_utils.py +9 -9
  26. datahub/metadata/_internal_schema_classes.py +1 -1
  27. datahub/metadata/schema.avsc +1 -1
  28. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  29. datahub/metadata/schemas/MetadataChangeEvent.avsc +1 -1
  30. datahub/sdk/search_filters.py +122 -1
  31. datahub/secret/datahub_secret_store.py +3 -0
  32. datahub/secret/environment_secret_store.py +29 -0
  33. datahub/secret/file_secret_store.py +49 -0
  34. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  35. datahub/sql_parsing/sqlglot_lineage.py +6 -1
  36. {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/WHEEL +0 -0
  37. {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/entry_points.txt +0 -0
  38. {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/licenses/LICENSE +0 -0
  39. {acryl_datahub-1.2.0.11rc1.dist-info → acryl_datahub-1.2.0.11rc3.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,14 @@ from datahub.ingestion.graph.filters import (
30
30
  _get_status_filter,
31
31
  )
32
32
  from datahub.metadata.schema_classes import EntityTypeName
33
- from datahub.metadata.urns import ContainerUrn, DataPlatformUrn, DomainUrn
33
+ from datahub.metadata.urns import (
34
+ ContainerUrn,
35
+ CorpGroupUrn,
36
+ CorpUserUrn,
37
+ DataPlatformUrn,
38
+ DomainUrn,
39
+ )
40
+ from datahub.utilities.urns.urn import guess_entity_type
34
41
 
35
42
  _AndSearchFilterRule = TypedDict(
36
43
  "_AndSearchFilterRule", {"and": List[SearchFilterRule]}
@@ -235,6 +242,94 @@ class _EnvFilter(_BaseFilter):
235
242
  ]
236
243
 
237
244
 
245
+ class _OwnerFilter(_BaseFilter):
246
+ """Filter for entities owned by specific users or groups."""
247
+
248
+ owner: List[str] = pydantic.Field(
249
+ description="The owner to filter on. Should be user or group URNs.",
250
+ )
251
+
252
+ @pydantic.validator("owner", each_item=True)
253
+ def validate_owner(cls, v: str) -> str:
254
+ if not v.startswith("urn:li:"):
255
+ raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
256
+ _type = guess_entity_type(v)
257
+ if _type == CorpUserUrn.ENTITY_TYPE:
258
+ return str(CorpUserUrn.from_string(v))
259
+ elif _type == CorpGroupUrn.ENTITY_TYPE:
260
+ return str(CorpGroupUrn.from_string(v))
261
+ else:
262
+ raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
263
+
264
+ def _build_rule(self) -> SearchFilterRule:
265
+ return SearchFilterRule(
266
+ field="owners",
267
+ condition="EQUAL",
268
+ values=self.owner,
269
+ )
270
+
271
+ def compile(self) -> _OrFilters:
272
+ return [{"and": [self._build_rule()]}]
273
+
274
+
275
+ class _GlossaryTermFilter(_BaseFilter):
276
+ """Filter for entities associated with specific glossary terms."""
277
+
278
+ glossary_term: List[str] = pydantic.Field(
279
+ description="The glossary term to filter on. Should be glossary term URNs.",
280
+ )
281
+
282
+ @pydantic.validator("glossary_term", each_item=True)
283
+ def validate_glossary_term(cls, v: str) -> str:
284
+ if not v.startswith("urn:li:"):
285
+ raise ValueError(f"Glossary term must be a valid URN, got: {v}")
286
+ # Validate that it's a glossary term URN
287
+ _type = guess_entity_type(v)
288
+ if _type != "glossaryTerm":
289
+ raise ValueError(
290
+ f"Glossary term must be a valid glossary term URN, got: {v}"
291
+ )
292
+ return v
293
+
294
+ def _build_rule(self) -> SearchFilterRule:
295
+ return SearchFilterRule(
296
+ field="glossaryTerms",
297
+ condition="EQUAL",
298
+ values=self.glossary_term,
299
+ )
300
+
301
+ def compile(self) -> _OrFilters:
302
+ return [{"and": [self._build_rule()]}]
303
+
304
+
305
+ class _TagFilter(_BaseFilter):
306
+ """Filter for entities associated with specific tags."""
307
+
308
+ tag: List[str] = pydantic.Field(
309
+ description="The tag to filter on. Should be tag URNs.",
310
+ )
311
+
312
+ @pydantic.validator("tag", each_item=True)
313
+ def validate_tag(cls, v: str) -> str:
314
+ if not v.startswith("urn:li:"):
315
+ raise ValueError(f"Tag must be a valid URN, got: {v}")
316
+ # Validate that it's a tag URN
317
+ _type = guess_entity_type(v)
318
+ if _type != "tag":
319
+ raise ValueError(f"Tag must be a valid tag URN, got: {v}")
320
+ return v
321
+
322
+ def _build_rule(self) -> SearchFilterRule:
323
+ return SearchFilterRule(
324
+ field="tags",
325
+ condition="EQUAL",
326
+ values=self.tag,
327
+ )
328
+
329
+ def compile(self) -> _OrFilters:
330
+ return [{"and": [self._build_rule()]}]
331
+
332
+
238
333
  class _CustomCondition(_BaseFilter):
239
334
  """Represents a single field condition."""
240
335
 
@@ -407,6 +502,9 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
407
502
  _DomainFilter,
408
503
  _ContainerFilter,
409
504
  _EnvFilter,
505
+ _OwnerFilter,
506
+ _GlossaryTermFilter,
507
+ _TagFilter,
410
508
  _CustomCondition,
411
509
  ]
412
510
 
@@ -448,6 +546,11 @@ else:
448
546
  _ContainerFilter, Tag(_ContainerFilter._field_discriminator())
449
547
  ],
450
548
  Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
549
+ Annotated[_OwnerFilter, Tag(_OwnerFilter._field_discriminator())],
550
+ Annotated[
551
+ _GlossaryTermFilter, Tag(_GlossaryTermFilter._field_discriminator())
552
+ ],
553
+ Annotated[_TagFilter, Tag(_TagFilter._field_discriminator())],
451
554
  Annotated[
452
555
  _CustomCondition, Tag(_CustomCondition._field_discriminator())
453
556
  ],
@@ -551,6 +654,24 @@ class FilterDsl:
551
654
  def env(env: Union[str, Sequence[str]], /) -> _EnvFilter:
552
655
  return _EnvFilter(env=[env] if isinstance(env, str) else env)
553
656
 
657
+ @staticmethod
658
+ def owner(owner: Union[str, Sequence[str]], /) -> _OwnerFilter:
659
+ return _OwnerFilter(owner=[owner] if isinstance(owner, str) else owner)
660
+
661
+ @staticmethod
662
+ def glossary_term(
663
+ glossary_term: Union[str, Sequence[str]], /
664
+ ) -> _GlossaryTermFilter:
665
+ return _GlossaryTermFilter(
666
+ glossary_term=[glossary_term]
667
+ if isinstance(glossary_term, str)
668
+ else glossary_term
669
+ )
670
+
671
+ @staticmethod
672
+ def tag(tag: Union[str, Sequence[str]], /) -> _TagFilter:
673
+ return _TagFilter(tag=[tag] if isinstance(tag, str) else tag)
674
+
554
675
  @staticmethod
555
676
  def has_custom_property(key: str, value: str) -> _CustomCondition:
556
677
  return _CustomCondition(
@@ -65,3 +65,6 @@ class DataHubSecretStore(SecretStore):
65
65
  def create(cls, config: Any) -> "DataHubSecretStore":
66
66
  config = DataHubSecretStoreConfig.parse_obj(config)
67
67
  return cls(config)
68
+
69
+ def close(self) -> None:
70
+ self.client.graph.close()
@@ -0,0 +1,29 @@
1
+ import os
2
+ from typing import Dict, List, Union
3
+
4
+ from datahub.secret.secret_store import SecretStore
5
+
6
+
7
+ # Simple SecretStore implementation that fetches Secret values from the local environment.
8
+ class EnvironmentSecretStore(SecretStore):
9
+ def __init__(self, config):
10
+ pass
11
+
12
+ def close(self) -> None:
13
+ return
14
+
15
+ def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
16
+ values = {}
17
+ for secret_name in secret_names:
18
+ values[secret_name] = os.getenv(secret_name)
19
+ return values
20
+
21
+ def get_secret_value(self, secret_name: str) -> Union[str, None]:
22
+ return os.getenv(secret_name)
23
+
24
+ def get_id(self) -> str:
25
+ return "env"
26
+
27
+ @classmethod
28
+ def create(cls, config: Dict) -> "EnvironmentSecretStore":
29
+ return cls(config)
@@ -0,0 +1,49 @@
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict, List, Union
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from datahub.secret.secret_store import SecretStore
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class FileSecretStoreConfig(BaseModel):
13
+ basedir: str = "/mnt/secrets"
14
+ max_length: int = 1024768
15
+
16
+
17
+ # Simple SecretStore implementation that fetches Secret values from the local files.
18
+ class FileSecretStore(SecretStore):
19
+ def __init__(self, config: FileSecretStoreConfig):
20
+ self.config = config
21
+
22
+ def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
23
+ values = {}
24
+ for secret_name in secret_names:
25
+ values[secret_name] = self.get_secret_value(secret_name)
26
+ return values
27
+
28
+ def get_secret_value(self, secret_name: str) -> Union[str, None]:
29
+ secret_path = os.path.join(self.config.basedir, secret_name)
30
+ if os.path.exists(secret_path):
31
+ with open(secret_path, "r") as f:
32
+ secret_value = f.read(self.config.max_length + 1)
33
+ if len(secret_value) > self.config.max_length:
34
+ logger.warning(
35
+ f"Secret {secret_name} is longer than {self.config.max_length} and will be truncated."
36
+ )
37
+ return secret_value[: self.config.max_length].rstrip()
38
+ return None
39
+
40
+ def get_id(self) -> str:
41
+ return "file"
42
+
43
+ def close(self) -> None:
44
+ return
45
+
46
+ @classmethod
47
+ def create(cls, config: Any) -> "FileSecretStore":
48
+ config = FileSecretStoreConfig.parse_obj(config)
49
+ return cls(config)
@@ -70,3 +70,30 @@ class HasStructuredPropertiesPatch(MetadataPatchProposal):
70
70
  ),
71
71
  )
72
72
  return self
73
+
74
+ def set_structured_property_manual(
75
+ self, property: StructuredPropertyValueAssignmentClass
76
+ ) -> Self:
77
+ """Add or update a structured property, using a StructuredPropertyValueAssignmentClass object."""
78
+
79
+ self.remove_structured_property(property.propertyUrn)
80
+ self._add_patch(
81
+ StructuredPropertiesClass.ASPECT_NAME,
82
+ "add",
83
+ path=("properties", property.propertyUrn),
84
+ value=property,
85
+ )
86
+ return self
87
+
88
+ def add_structured_property_manual(
89
+ self, property: StructuredPropertyValueAssignmentClass
90
+ ) -> Self:
91
+ """Add a structured property, using a StructuredPropertyValueAssignmentClass object."""
92
+
93
+ self._add_patch(
94
+ StructuredPropertiesClass.ASPECT_NAME,
95
+ "add",
96
+ path=("properties", property.propertyUrn),
97
+ value=property,
98
+ )
99
+ return self
@@ -1176,7 +1176,12 @@ def _try_extract_select(
1176
1176
  statement = sqlglot.exp.Select().select("*").from_(statement)
1177
1177
  elif isinstance(statement, sqlglot.exp.Insert):
1178
1178
  # TODO Need to map column renames in the expressions part of the statement.
1179
- statement = statement.expression
1179
+ # Preserve CTEs when extracting the SELECT expression from INSERT
1180
+ original_ctes = statement.ctes
1181
+ statement = statement.expression # Get the SELECT expression from the INSERT
1182
+ if isinstance(statement, sqlglot.exp.Query) and original_ctes:
1183
+ for cte in original_ctes:
1184
+ statement = statement.with_(alias=cte.alias, as_=cte.this)
1180
1185
  elif isinstance(statement, sqlglot.exp.Update):
1181
1186
  # Assumption: the output table is already captured in the modified tables list.
1182
1187
  statement = _extract_select_from_update(statement)