acryl-datahub 1.2.0.1__py3-none-any.whl → 1.2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/METADATA +2574 -2572
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/RECORD +54 -46
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +13 -1
- datahub/emitter/rest_emitter.py +3 -1
- datahub/ingestion/autogenerated/capability_summary.json +97 -6
- datahub/ingestion/source/abs/source.py +5 -29
- datahub/ingestion/source/aws/glue.py +8 -0
- datahub/ingestion/source/cassandra/cassandra.py +5 -7
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/datahub/datahub_source.py +3 -0
- datahub/ingestion/source/dbt/dbt_common.py +69 -2
- datahub/ingestion/source/delta_lake/source.py +1 -0
- datahub/ingestion/source/ge_data_profiler.py +9 -1
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +120 -0
- datahub/ingestion/source/grafana/report.py +91 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/hex.py +8 -0
- datahub/ingestion/source/looker/looker_common.py +40 -4
- datahub/ingestion/source/looker/looker_source.py +9 -0
- datahub/ingestion/source/looker/lookml_source.py +8 -0
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/redshift/redshift.py +8 -1
- datahub/ingestion/source/s3/source.py +14 -34
- datahub/ingestion/source/sql/athena.py +8 -2
- datahub/ingestion/source/sql/clickhouse.py +9 -0
- datahub/ingestion/source/sql/postgres.py +190 -1
- datahub/ingestion/source/sql_queries.py +111 -76
- datahub/ingestion/source/unity/proxy.py +8 -8
- datahub/metadata/_internal_schema_classes.py +96 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +2 -0
- datahub/metadata/schema.avsc +69 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +10 -1
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +42 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -0
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/sdk/dataset.py +44 -0
- datahub/sdk/search_filters.py +84 -15
- datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
- datahub/telemetry/telemetry.py +4 -1
- datahub/upgrade/upgrade.py +5 -3
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/top_level.txt +0 -0
datahub/sdk/dataset.py
CHANGED
|
@@ -72,6 +72,11 @@ UpstreamLineageInputType: TypeAlias = Union[
|
|
|
72
72
|
Dict[DatasetUrnOrStr, ColumnLineageMapping],
|
|
73
73
|
]
|
|
74
74
|
|
|
75
|
+
ViewDefinitionInputType: TypeAlias = Union[
|
|
76
|
+
str,
|
|
77
|
+
models.ViewPropertiesClass,
|
|
78
|
+
]
|
|
79
|
+
|
|
75
80
|
|
|
76
81
|
def _parse_upstream_input(
|
|
77
82
|
upstream_input: UpstreamInputType,
|
|
@@ -467,6 +472,7 @@ class Dataset(
|
|
|
467
472
|
custom_properties: Optional[Dict[str, str]] = None,
|
|
468
473
|
created: Optional[datetime] = None,
|
|
469
474
|
last_modified: Optional[datetime] = None,
|
|
475
|
+
view_definition: Optional[ViewDefinitionInputType] = None,
|
|
470
476
|
# Standard aspects.
|
|
471
477
|
parent_container: ParentContainerInputType | Unset = unset,
|
|
472
478
|
subtype: Optional[str] = None,
|
|
@@ -495,6 +501,7 @@ class Dataset(
|
|
|
495
501
|
custom_properties: Optional dictionary of custom properties.
|
|
496
502
|
created: Optional creation timestamp.
|
|
497
503
|
last_modified: Optional last modification timestamp.
|
|
504
|
+
view_definition: Optional view definition for the dataset.
|
|
498
505
|
parent_container: Optional parent container for this dataset.
|
|
499
506
|
subtype: Optional subtype of the dataset.
|
|
500
507
|
owners: Optional list of owners.
|
|
@@ -536,6 +543,8 @@ class Dataset(
|
|
|
536
543
|
self.set_created(created)
|
|
537
544
|
if last_modified is not None:
|
|
538
545
|
self.set_last_modified(last_modified)
|
|
546
|
+
if view_definition is not None:
|
|
547
|
+
self.set_view_definition(view_definition)
|
|
539
548
|
|
|
540
549
|
if parent_container is not unset:
|
|
541
550
|
self._set_container(parent_container)
|
|
@@ -717,6 +726,41 @@ class Dataset(
|
|
|
717
726
|
def set_last_modified(self, last_modified: datetime) -> None:
|
|
718
727
|
self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
|
|
719
728
|
|
|
729
|
+
@property
|
|
730
|
+
def view_definition(self) -> Optional[models.ViewPropertiesClass]:
|
|
731
|
+
"""Get the view definition of the dataset.
|
|
732
|
+
|
|
733
|
+
Under typical usage, this will be present if the subtype is "View".
|
|
734
|
+
|
|
735
|
+
Returns:
|
|
736
|
+
The view definition if set, None otherwise.
|
|
737
|
+
"""
|
|
738
|
+
return self._get_aspect(models.ViewPropertiesClass)
|
|
739
|
+
|
|
740
|
+
def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
|
|
741
|
+
"""Set the view definition of the dataset.
|
|
742
|
+
|
|
743
|
+
If you're setting a view definition, subtype should typically be set to "view".
|
|
744
|
+
|
|
745
|
+
If a string is provided, it will be treated as a SQL view definition. To set
|
|
746
|
+
a custom language or other properties, provide a ViewPropertiesClass object.
|
|
747
|
+
|
|
748
|
+
Args:
|
|
749
|
+
view_definition: The view definition to set.
|
|
750
|
+
"""
|
|
751
|
+
if isinstance(view_definition, models.ViewPropertiesClass):
|
|
752
|
+
self._set_aspect(view_definition)
|
|
753
|
+
elif isinstance(view_definition, str):
|
|
754
|
+
self._set_aspect(
|
|
755
|
+
models.ViewPropertiesClass(
|
|
756
|
+
materialized=False,
|
|
757
|
+
viewLogic=view_definition,
|
|
758
|
+
viewLanguage="SQL",
|
|
759
|
+
)
|
|
760
|
+
)
|
|
761
|
+
else:
|
|
762
|
+
assert_never(view_definition)
|
|
763
|
+
|
|
720
764
|
def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
|
|
721
765
|
schema_metadata = self._get_aspect(models.SchemaMetadataClass)
|
|
722
766
|
if schema_metadata is None:
|
datahub/sdk/search_filters.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
|
+
import json
|
|
4
5
|
from typing import (
|
|
5
6
|
TYPE_CHECKING,
|
|
6
7
|
Annotated,
|
|
@@ -29,7 +30,7 @@ from datahub.ingestion.graph.filters import (
|
|
|
29
30
|
_get_status_filter,
|
|
30
31
|
)
|
|
31
32
|
from datahub.metadata.schema_classes import EntityTypeName
|
|
32
|
-
from datahub.metadata.urns import DataPlatformUrn, DomainUrn
|
|
33
|
+
from datahub.metadata.urns import ContainerUrn, DataPlatformUrn, DomainUrn
|
|
33
34
|
|
|
34
35
|
_AndSearchFilterRule = TypedDict(
|
|
35
36
|
"_AndSearchFilterRule", {"and": List[SearchFilterRule]}
|
|
@@ -81,7 +82,7 @@ class _EntityTypeFilter(_BaseFilter):
|
|
|
81
82
|
ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
|
|
82
83
|
|
|
83
84
|
entity_type: List[str] = pydantic.Field(
|
|
84
|
-
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
|
|
85
|
+
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', 'dataProduct', etc.",
|
|
85
86
|
)
|
|
86
87
|
|
|
87
88
|
def _build_rule(self) -> SearchFilterRule:
|
|
@@ -174,6 +175,39 @@ class _DomainFilter(_BaseFilter):
|
|
|
174
175
|
return [{"and": [self._build_rule()]}]
|
|
175
176
|
|
|
176
177
|
|
|
178
|
+
class _ContainerFilter(_BaseFilter):
|
|
179
|
+
container: List[str]
|
|
180
|
+
direct_descendants_only: bool = pydantic.Field(
|
|
181
|
+
default=False,
|
|
182
|
+
description="If true, only entities that are direct descendants of the container will be returned.",
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
@pydantic.validator("container", each_item=True)
|
|
186
|
+
def validate_container(cls, v: str) -> str:
|
|
187
|
+
return str(ContainerUrn.from_string(v))
|
|
188
|
+
|
|
189
|
+
@classmethod
|
|
190
|
+
def _field_discriminator(cls) -> str:
|
|
191
|
+
return "container"
|
|
192
|
+
|
|
193
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
194
|
+
if self.direct_descendants_only:
|
|
195
|
+
return SearchFilterRule(
|
|
196
|
+
field="container",
|
|
197
|
+
condition="EQUAL",
|
|
198
|
+
values=self.container,
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
return SearchFilterRule(
|
|
202
|
+
field="browsePathV2",
|
|
203
|
+
condition="CONTAIN",
|
|
204
|
+
values=self.container,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def compile(self) -> _OrFilters:
|
|
208
|
+
return [{"and": [self._build_rule()]}]
|
|
209
|
+
|
|
210
|
+
|
|
177
211
|
class _EnvFilter(_BaseFilter):
|
|
178
212
|
# Note that not all entity types have an env (e.g. dashboards / charts).
|
|
179
213
|
# If the env filter is specified, these will be excluded.
|
|
@@ -342,6 +376,8 @@ def _filter_discriminator(v: Any) -> Optional[str]:
|
|
|
342
376
|
keys = list(v.keys())
|
|
343
377
|
if len(keys) == 1:
|
|
344
378
|
return keys[0]
|
|
379
|
+
elif set(keys).issuperset({"container"}):
|
|
380
|
+
return _ContainerFilter._field_discriminator()
|
|
345
381
|
elif set(keys).issuperset({"field", "condition"}):
|
|
346
382
|
return _CustomCondition._field_discriminator()
|
|
347
383
|
|
|
@@ -360,6 +396,7 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
|
|
|
360
396
|
_StatusFilter,
|
|
361
397
|
_PlatformFilter,
|
|
362
398
|
_DomainFilter,
|
|
399
|
+
_ContainerFilter,
|
|
363
400
|
_EnvFilter,
|
|
364
401
|
_CustomCondition,
|
|
365
402
|
]
|
|
@@ -370,25 +407,45 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
|
|
|
370
407
|
else:
|
|
371
408
|
from pydantic import Discriminator, Tag
|
|
372
409
|
|
|
410
|
+
def _parse_json_from_string(value: Any) -> Any:
|
|
411
|
+
if isinstance(value, str):
|
|
412
|
+
try:
|
|
413
|
+
return json.loads(value)
|
|
414
|
+
except json.JSONDecodeError:
|
|
415
|
+
return value
|
|
416
|
+
else:
|
|
417
|
+
return value
|
|
418
|
+
|
|
373
419
|
# TODO: Once we're fully on pydantic 2, we can use a RootModel here.
|
|
374
420
|
# That way we'd be able to attach methods to the Filter type.
|
|
375
421
|
# e.g. replace load_filters(...) with Filter.load(...)
|
|
376
422
|
Filter = Annotated[
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
423
|
+
Annotated[
|
|
424
|
+
Union[
|
|
425
|
+
Annotated[_And, Tag(_And._field_discriminator())],
|
|
426
|
+
Annotated[_Or, Tag(_Or._field_discriminator())],
|
|
427
|
+
Annotated[_Not, Tag(_Not._field_discriminator())],
|
|
428
|
+
Annotated[
|
|
429
|
+
_EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
|
|
430
|
+
],
|
|
431
|
+
Annotated[
|
|
432
|
+
_EntitySubtypeFilter,
|
|
433
|
+
Tag(_EntitySubtypeFilter._field_discriminator()),
|
|
434
|
+
],
|
|
435
|
+
Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
|
|
436
|
+
Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
|
|
437
|
+
Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
|
|
438
|
+
Annotated[
|
|
439
|
+
_ContainerFilter, Tag(_ContainerFilter._field_discriminator())
|
|
440
|
+
],
|
|
441
|
+
Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
|
|
442
|
+
Annotated[
|
|
443
|
+
_CustomCondition, Tag(_CustomCondition._field_discriminator())
|
|
444
|
+
],
|
|
384
445
|
],
|
|
385
|
-
|
|
386
|
-
Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
|
|
387
|
-
Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
|
|
388
|
-
Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
|
|
389
|
-
Annotated[_CustomCondition, Tag(_CustomCondition._field_discriminator())],
|
|
446
|
+
Discriminator(_filter_discriminator),
|
|
390
447
|
],
|
|
391
|
-
|
|
448
|
+
pydantic.BeforeValidator(_parse_json_from_string),
|
|
392
449
|
]
|
|
393
450
|
|
|
394
451
|
# Required to resolve forward references to "Filter"
|
|
@@ -468,6 +525,18 @@ class FilterDsl:
|
|
|
468
525
|
def domain(domain: Union[str, Sequence[str]], /) -> _DomainFilter:
|
|
469
526
|
return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
|
|
470
527
|
|
|
528
|
+
@staticmethod
|
|
529
|
+
def container(
|
|
530
|
+
container: Union[str, Sequence[str]],
|
|
531
|
+
/,
|
|
532
|
+
*,
|
|
533
|
+
direct_descendants_only: bool = False,
|
|
534
|
+
) -> _ContainerFilter:
|
|
535
|
+
return _ContainerFilter(
|
|
536
|
+
container=[container] if isinstance(container, str) else container,
|
|
537
|
+
direct_descendants_only=direct_descendants_only,
|
|
538
|
+
)
|
|
539
|
+
|
|
471
540
|
@staticmethod
|
|
472
541
|
def env(env: Union[str, Sequence[str]], /) -> _EnvFilter:
|
|
473
542
|
return _EnvFilter(env=[env] if isinstance(env, str) else env)
|
|
@@ -49,6 +49,7 @@ from datahub.sql_parsing.sqlglot_lineage import (
|
|
|
49
49
|
sqlglot_lineage,
|
|
50
50
|
)
|
|
51
51
|
from datahub.sql_parsing.sqlglot_utils import (
|
|
52
|
+
DialectOrStr,
|
|
52
53
|
_parse_statement,
|
|
53
54
|
get_query_fingerprint,
|
|
54
55
|
try_format_query,
|
|
@@ -109,6 +110,7 @@ class ObservedQuery:
|
|
|
109
110
|
default_schema: Optional[str] = None
|
|
110
111
|
query_hash: Optional[str] = None
|
|
111
112
|
usage_multiplier: int = 1
|
|
113
|
+
override_dialect: Optional[DialectOrStr] = None
|
|
112
114
|
|
|
113
115
|
# Use this to store additional key-value information about the query for debugging.
|
|
114
116
|
extra_info: Optional[dict] = None
|
|
@@ -190,6 +192,7 @@ class QueryMetadata:
|
|
|
190
192
|
source=models.QuerySourceClass.SYSTEM,
|
|
191
193
|
created=self.make_created_audit_stamp(),
|
|
192
194
|
lastModified=self.make_last_modified_audit_stamp(),
|
|
195
|
+
origin=self.origin.urn() if self.origin else None,
|
|
193
196
|
)
|
|
194
197
|
|
|
195
198
|
|
|
@@ -833,6 +836,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
833
836
|
session_id=session_id,
|
|
834
837
|
timestamp=observed.timestamp,
|
|
835
838
|
user=observed.user,
|
|
839
|
+
override_dialect=observed.override_dialect,
|
|
836
840
|
)
|
|
837
841
|
if parsed.debug_info.error:
|
|
838
842
|
self.report.observed_query_parse_failures.append(
|
|
@@ -1167,6 +1171,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
1167
1171
|
session_id: str = _MISSING_SESSION_ID,
|
|
1168
1172
|
timestamp: Optional[datetime] = None,
|
|
1169
1173
|
user: Optional[Union[CorpUserUrn, CorpGroupUrn]] = None,
|
|
1174
|
+
override_dialect: Optional[DialectOrStr] = None,
|
|
1170
1175
|
) -> SqlParsingResult:
|
|
1171
1176
|
with self.report.sql_parsing_timer:
|
|
1172
1177
|
parsed = sqlglot_lineage(
|
|
@@ -1174,6 +1179,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
1174
1179
|
schema_resolver=schema_resolver,
|
|
1175
1180
|
default_db=default_db,
|
|
1176
1181
|
default_schema=default_schema,
|
|
1182
|
+
override_dialect=override_dialect,
|
|
1177
1183
|
)
|
|
1178
1184
|
self.report.num_sql_parsed += 1
|
|
1179
1185
|
|
datahub/telemetry/telemetry.py
CHANGED
|
@@ -272,7 +272,10 @@ class Telemetry:
|
|
|
272
272
|
if self.sentry_enabled:
|
|
273
273
|
import sentry_sdk
|
|
274
274
|
|
|
275
|
-
sentry_sdk.set_tags(properties)
|
|
275
|
+
# Note: once we're on sentry-sdk 2.1.0+, we can use sentry_sdk.set_tags(properties)
|
|
276
|
+
# See https://github.com/getsentry/sentry-python/commit/6c960d752c7c7aff3fd7469d2e9ad98f19663aa8
|
|
277
|
+
for key, value in properties.items():
|
|
278
|
+
sentry_sdk.set_tag(key, value)
|
|
276
279
|
|
|
277
280
|
def init_capture_exception(self) -> None:
|
|
278
281
|
if self.sentry_enabled:
|
datahub/upgrade/upgrade.py
CHANGED
|
@@ -352,9 +352,11 @@ def _maybe_print_upgrade_message(
|
|
|
352
352
|
if version_stats.client.latest
|
|
353
353
|
else None
|
|
354
354
|
)
|
|
355
|
-
client_server_compat =
|
|
356
|
-
|
|
357
|
-
|
|
355
|
+
client_server_compat = 0
|
|
356
|
+
if version_stats.server.current_server_type != "cloud":
|
|
357
|
+
client_server_compat = is_client_server_compatible(
|
|
358
|
+
version_stats.client.current, version_stats.server.current
|
|
359
|
+
)
|
|
358
360
|
|
|
359
361
|
if latest_release_date and current_release_date:
|
|
360
362
|
assert version_stats.client.latest
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|