acryl-datahub 1.2.0.2rc2__py3-none-any.whl → 1.2.0.2rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/METADATA +2569 -2567
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/RECORD +39 -31
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +13 -1
- datahub/ingestion/autogenerated/capability_summary.json +97 -6
- datahub/ingestion/source/aws/glue.py +8 -0
- datahub/ingestion/source/cassandra/cassandra.py +5 -7
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/datahub/datahub_source.py +3 -0
- datahub/ingestion/source/delta_lake/source.py +1 -0
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +120 -0
- datahub/ingestion/source/grafana/report.py +91 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/hex.py +8 -0
- datahub/ingestion/source/looker/looker_source.py +9 -0
- datahub/ingestion/source/looker/lookml_source.py +8 -0
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/redshift/redshift.py +8 -1
- datahub/ingestion/source/s3/source.py +9 -1
- datahub/ingestion/source/sql/athena.py +8 -2
- datahub/ingestion/source/sql/clickhouse.py +9 -0
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/metadata/_internal_schema_classes.py +18 -3
- datahub/metadata/schema.avsc +10 -1
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +10 -1
- datahub/sdk/dataset.py +44 -0
- datahub/sdk/search_filters.py +34 -14
- datahub/sql_parsing/sql_parsing_aggregator.py +5 -0
- datahub/telemetry/telemetry.py +4 -1
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/top_level.txt +0 -0
|
@@ -20163,23 +20163,24 @@ class DataHubPageModuleVisibilityClass(DictWrapper):
|
|
|
20163
20163
|
|
|
20164
20164
|
|
|
20165
20165
|
class HierarchyModuleParamsClass(DictWrapper):
|
|
20166
|
-
"""The params required if the module is type HIERARCHY_VIEW
|
|
20167
|
-
TODO: add filters
|
|
20168
|
-
relatedEntitiesFilter: optional Filter"""
|
|
20166
|
+
"""The params required if the module is type HIERARCHY_VIEW"""
|
|
20169
20167
|
|
|
20170
20168
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.module.HierarchyModuleParams")
|
|
20171
20169
|
def __init__(self,
|
|
20172
20170
|
showRelatedEntities: bool,
|
|
20173
20171
|
assetUrns: Union[None, List[str]]=None,
|
|
20172
|
+
relatedEntitiesFilterJson: Union[None, str]=None,
|
|
20174
20173
|
):
|
|
20175
20174
|
super().__init__()
|
|
20176
20175
|
|
|
20177
20176
|
self.assetUrns = assetUrns
|
|
20178
20177
|
self.showRelatedEntities = showRelatedEntities
|
|
20178
|
+
self.relatedEntitiesFilterJson = relatedEntitiesFilterJson
|
|
20179
20179
|
|
|
20180
20180
|
def _restore_defaults(self) -> None:
|
|
20181
20181
|
self.assetUrns = self.RECORD_SCHEMA.fields_dict["assetUrns"].default
|
|
20182
20182
|
self.showRelatedEntities = bool()
|
|
20183
|
+
self.relatedEntitiesFilterJson = self.RECORD_SCHEMA.fields_dict["relatedEntitiesFilterJson"].default
|
|
20183
20184
|
|
|
20184
20185
|
|
|
20185
20186
|
@property
|
|
@@ -20202,6 +20203,20 @@ class HierarchyModuleParamsClass(DictWrapper):
|
|
|
20202
20203
|
self._inner_dict['showRelatedEntities'] = value
|
|
20203
20204
|
|
|
20204
20205
|
|
|
20206
|
+
@property
|
|
20207
|
+
def relatedEntitiesFilterJson(self) -> Union[None, str]:
|
|
20208
|
+
"""Optional filters to filter relatedEntities (assetUrns) out
|
|
20209
|
+
|
|
20210
|
+
The stringified json representing the logical predicate built in the UI to select assets.
|
|
20211
|
+
This predicate is turned into orFilters to send through graphql since graphql doesn't support
|
|
20212
|
+
arbitrary nesting. This string is used to restore the UI for this logical predicate."""
|
|
20213
|
+
return self._inner_dict.get('relatedEntitiesFilterJson') # type: ignore
|
|
20214
|
+
|
|
20215
|
+
@relatedEntitiesFilterJson.setter
|
|
20216
|
+
def relatedEntitiesFilterJson(self, value: Union[None, str]) -> None:
|
|
20217
|
+
self._inner_dict['relatedEntitiesFilterJson'] = value
|
|
20218
|
+
|
|
20219
|
+
|
|
20205
20220
|
class LinkModuleParamsClass(DictWrapper):
|
|
20206
20221
|
# No docs available.
|
|
20207
20222
|
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -17844,9 +17844,18 @@
|
|
|
17844
17844
|
{
|
|
17845
17845
|
"type": "boolean",
|
|
17846
17846
|
"name": "showRelatedEntities"
|
|
17847
|
+
},
|
|
17848
|
+
{
|
|
17849
|
+
"type": [
|
|
17850
|
+
"null",
|
|
17851
|
+
"string"
|
|
17852
|
+
],
|
|
17853
|
+
"name": "relatedEntitiesFilterJson",
|
|
17854
|
+
"default": null,
|
|
17855
|
+
"doc": "Optional filters to filter relatedEntities (assetUrns) out\n\nThe stringified json representing the logical predicate built in the UI to select assets.\nThis predicate is turned into orFilters to send through graphql since graphql doesn't support\narbitrary nesting. This string is used to restore the UI for this logical predicate."
|
|
17847
17856
|
}
|
|
17848
17857
|
],
|
|
17849
|
-
"doc": "The params required if the module is type HIERARCHY_VIEW
|
|
17858
|
+
"doc": "The params required if the module is type HIERARCHY_VIEW"
|
|
17850
17859
|
}
|
|
17851
17860
|
],
|
|
17852
17861
|
"name": "hierarchyViewParams",
|
|
@@ -181,9 +181,18 @@
|
|
|
181
181
|
{
|
|
182
182
|
"type": "boolean",
|
|
183
183
|
"name": "showRelatedEntities"
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
"type": [
|
|
187
|
+
"null",
|
|
188
|
+
"string"
|
|
189
|
+
],
|
|
190
|
+
"name": "relatedEntitiesFilterJson",
|
|
191
|
+
"default": null,
|
|
192
|
+
"doc": "Optional filters to filter relatedEntities (assetUrns) out\n\nThe stringified json representing the logical predicate built in the UI to select assets.\nThis predicate is turned into orFilters to send through graphql since graphql doesn't support\narbitrary nesting. This string is used to restore the UI for this logical predicate."
|
|
184
193
|
}
|
|
185
194
|
],
|
|
186
|
-
"doc": "The params required if the module is type HIERARCHY_VIEW
|
|
195
|
+
"doc": "The params required if the module is type HIERARCHY_VIEW"
|
|
187
196
|
}
|
|
188
197
|
],
|
|
189
198
|
"name": "hierarchyViewParams",
|
datahub/sdk/dataset.py
CHANGED
|
@@ -72,6 +72,11 @@ UpstreamLineageInputType: TypeAlias = Union[
|
|
|
72
72
|
Dict[DatasetUrnOrStr, ColumnLineageMapping],
|
|
73
73
|
]
|
|
74
74
|
|
|
75
|
+
ViewDefinitionInputType: TypeAlias = Union[
|
|
76
|
+
str,
|
|
77
|
+
models.ViewPropertiesClass,
|
|
78
|
+
]
|
|
79
|
+
|
|
75
80
|
|
|
76
81
|
def _parse_upstream_input(
|
|
77
82
|
upstream_input: UpstreamInputType,
|
|
@@ -467,6 +472,7 @@ class Dataset(
|
|
|
467
472
|
custom_properties: Optional[Dict[str, str]] = None,
|
|
468
473
|
created: Optional[datetime] = None,
|
|
469
474
|
last_modified: Optional[datetime] = None,
|
|
475
|
+
view_definition: Optional[ViewDefinitionInputType] = None,
|
|
470
476
|
# Standard aspects.
|
|
471
477
|
parent_container: ParentContainerInputType | Unset = unset,
|
|
472
478
|
subtype: Optional[str] = None,
|
|
@@ -495,6 +501,7 @@ class Dataset(
|
|
|
495
501
|
custom_properties: Optional dictionary of custom properties.
|
|
496
502
|
created: Optional creation timestamp.
|
|
497
503
|
last_modified: Optional last modification timestamp.
|
|
504
|
+
view_definition: Optional view definition for the dataset.
|
|
498
505
|
parent_container: Optional parent container for this dataset.
|
|
499
506
|
subtype: Optional subtype of the dataset.
|
|
500
507
|
owners: Optional list of owners.
|
|
@@ -536,6 +543,8 @@ class Dataset(
|
|
|
536
543
|
self.set_created(created)
|
|
537
544
|
if last_modified is not None:
|
|
538
545
|
self.set_last_modified(last_modified)
|
|
546
|
+
if view_definition is not None:
|
|
547
|
+
self.set_view_definition(view_definition)
|
|
539
548
|
|
|
540
549
|
if parent_container is not unset:
|
|
541
550
|
self._set_container(parent_container)
|
|
@@ -717,6 +726,41 @@ class Dataset(
|
|
|
717
726
|
def set_last_modified(self, last_modified: datetime) -> None:
|
|
718
727
|
self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
|
|
719
728
|
|
|
729
|
+
@property
|
|
730
|
+
def view_definition(self) -> Optional[models.ViewPropertiesClass]:
|
|
731
|
+
"""Get the view definition of the dataset.
|
|
732
|
+
|
|
733
|
+
Under typical usage, this will be present if the subtype is "View".
|
|
734
|
+
|
|
735
|
+
Returns:
|
|
736
|
+
The view definition if set, None otherwise.
|
|
737
|
+
"""
|
|
738
|
+
return self._get_aspect(models.ViewPropertiesClass)
|
|
739
|
+
|
|
740
|
+
def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
|
|
741
|
+
"""Set the view definition of the dataset.
|
|
742
|
+
|
|
743
|
+
If you're setting a view definition, subtype should typically be set to "view".
|
|
744
|
+
|
|
745
|
+
If a string is provided, it will be treated as a SQL view definition. To set
|
|
746
|
+
a custom language or other properties, provide a ViewPropertiesClass object.
|
|
747
|
+
|
|
748
|
+
Args:
|
|
749
|
+
view_definition: The view definition to set.
|
|
750
|
+
"""
|
|
751
|
+
if isinstance(view_definition, models.ViewPropertiesClass):
|
|
752
|
+
self._set_aspect(view_definition)
|
|
753
|
+
elif isinstance(view_definition, str):
|
|
754
|
+
self._set_aspect(
|
|
755
|
+
models.ViewPropertiesClass(
|
|
756
|
+
materialized=False,
|
|
757
|
+
viewLogic=view_definition,
|
|
758
|
+
viewLanguage="SQL",
|
|
759
|
+
)
|
|
760
|
+
)
|
|
761
|
+
else:
|
|
762
|
+
assert_never(view_definition)
|
|
763
|
+
|
|
720
764
|
def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
|
|
721
765
|
schema_metadata = self._get_aspect(models.SchemaMetadataClass)
|
|
722
766
|
if schema_metadata is None:
|
datahub/sdk/search_filters.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
|
+
import json
|
|
4
5
|
from typing import (
|
|
5
6
|
TYPE_CHECKING,
|
|
6
7
|
Annotated,
|
|
@@ -406,26 +407,45 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
|
|
|
406
407
|
else:
|
|
407
408
|
from pydantic import Discriminator, Tag
|
|
408
409
|
|
|
410
|
+
def _parse_json_from_string(value: Any) -> Any:
|
|
411
|
+
if isinstance(value, str):
|
|
412
|
+
try:
|
|
413
|
+
return json.loads(value)
|
|
414
|
+
except json.JSONDecodeError:
|
|
415
|
+
return value
|
|
416
|
+
else:
|
|
417
|
+
return value
|
|
418
|
+
|
|
409
419
|
# TODO: Once we're fully on pydantic 2, we can use a RootModel here.
|
|
410
420
|
# That way we'd be able to attach methods to the Filter type.
|
|
411
421
|
# e.g. replace load_filters(...) with Filter.load(...)
|
|
412
422
|
Filter = Annotated[
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
423
|
+
Annotated[
|
|
424
|
+
Union[
|
|
425
|
+
Annotated[_And, Tag(_And._field_discriminator())],
|
|
426
|
+
Annotated[_Or, Tag(_Or._field_discriminator())],
|
|
427
|
+
Annotated[_Not, Tag(_Not._field_discriminator())],
|
|
428
|
+
Annotated[
|
|
429
|
+
_EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
|
|
430
|
+
],
|
|
431
|
+
Annotated[
|
|
432
|
+
_EntitySubtypeFilter,
|
|
433
|
+
Tag(_EntitySubtypeFilter._field_discriminator()),
|
|
434
|
+
],
|
|
435
|
+
Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
|
|
436
|
+
Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
|
|
437
|
+
Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
|
|
438
|
+
Annotated[
|
|
439
|
+
_ContainerFilter, Tag(_ContainerFilter._field_discriminator())
|
|
440
|
+
],
|
|
441
|
+
Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
|
|
442
|
+
Annotated[
|
|
443
|
+
_CustomCondition, Tag(_CustomCondition._field_discriminator())
|
|
444
|
+
],
|
|
420
445
|
],
|
|
421
|
-
|
|
422
|
-
Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
|
|
423
|
-
Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
|
|
424
|
-
Annotated[_ContainerFilter, Tag(_ContainerFilter._field_discriminator())],
|
|
425
|
-
Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
|
|
426
|
-
Annotated[_CustomCondition, Tag(_CustomCondition._field_discriminator())],
|
|
446
|
+
Discriminator(_filter_discriminator),
|
|
427
447
|
],
|
|
428
|
-
|
|
448
|
+
pydantic.BeforeValidator(_parse_json_from_string),
|
|
429
449
|
]
|
|
430
450
|
|
|
431
451
|
# Required to resolve forward references to "Filter"
|
|
@@ -49,6 +49,7 @@ from datahub.sql_parsing.sqlglot_lineage import (
|
|
|
49
49
|
sqlglot_lineage,
|
|
50
50
|
)
|
|
51
51
|
from datahub.sql_parsing.sqlglot_utils import (
|
|
52
|
+
DialectOrStr,
|
|
52
53
|
_parse_statement,
|
|
53
54
|
get_query_fingerprint,
|
|
54
55
|
try_format_query,
|
|
@@ -109,6 +110,7 @@ class ObservedQuery:
|
|
|
109
110
|
default_schema: Optional[str] = None
|
|
110
111
|
query_hash: Optional[str] = None
|
|
111
112
|
usage_multiplier: int = 1
|
|
113
|
+
override_dialect: Optional[DialectOrStr] = None
|
|
112
114
|
|
|
113
115
|
# Use this to store additional key-value information about the query for debugging.
|
|
114
116
|
extra_info: Optional[dict] = None
|
|
@@ -834,6 +836,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
834
836
|
session_id=session_id,
|
|
835
837
|
timestamp=observed.timestamp,
|
|
836
838
|
user=observed.user,
|
|
839
|
+
override_dialect=observed.override_dialect,
|
|
837
840
|
)
|
|
838
841
|
if parsed.debug_info.error:
|
|
839
842
|
self.report.observed_query_parse_failures.append(
|
|
@@ -1168,6 +1171,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
1168
1171
|
session_id: str = _MISSING_SESSION_ID,
|
|
1169
1172
|
timestamp: Optional[datetime] = None,
|
|
1170
1173
|
user: Optional[Union[CorpUserUrn, CorpGroupUrn]] = None,
|
|
1174
|
+
override_dialect: Optional[DialectOrStr] = None,
|
|
1171
1175
|
) -> SqlParsingResult:
|
|
1172
1176
|
with self.report.sql_parsing_timer:
|
|
1173
1177
|
parsed = sqlglot_lineage(
|
|
@@ -1175,6 +1179,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
1175
1179
|
schema_resolver=schema_resolver,
|
|
1176
1180
|
default_db=default_db,
|
|
1177
1181
|
default_schema=default_schema,
|
|
1182
|
+
override_dialect=override_dialect,
|
|
1178
1183
|
)
|
|
1179
1184
|
self.report.num_sql_parsed += 1
|
|
1180
1185
|
|
datahub/telemetry/telemetry.py
CHANGED
|
@@ -272,7 +272,10 @@ class Telemetry:
|
|
|
272
272
|
if self.sentry_enabled:
|
|
273
273
|
import sentry_sdk
|
|
274
274
|
|
|
275
|
-
sentry_sdk.set_tags(properties)
|
|
275
|
+
# Note: once we're on sentry-sdk 2.1.0+, we can use sentry_sdk.set_tags(properties)
|
|
276
|
+
# See https://github.com/getsentry/sentry-python/commit/6c960d752c7c7aff3fd7469d2e9ad98f19663aa8
|
|
277
|
+
for key, value in properties.items():
|
|
278
|
+
sentry_sdk.set_tag(key, value)
|
|
276
279
|
|
|
277
280
|
def init_capture_exception(self) -> None:
|
|
278
281
|
if self.sentry_enabled:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|