acryl-datahub 1.2.0.2rc1__py3-none-any.whl → 1.2.0.2rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (41) hide show
  1. {acryl_datahub-1.2.0.2rc1.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/METADATA +2707 -2705
  2. {acryl_datahub-1.2.0.2rc1.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/RECORD +41 -33
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataset/dataset.py +13 -1
  5. datahub/ingestion/autogenerated/capability_summary.json +97 -6
  6. datahub/ingestion/source/aws/glue.py +8 -0
  7. datahub/ingestion/source/cassandra/cassandra.py +5 -7
  8. datahub/ingestion/source/common/subtypes.py +2 -0
  9. datahub/ingestion/source/datahub/datahub_source.py +3 -0
  10. datahub/ingestion/source/delta_lake/source.py +1 -0
  11. datahub/ingestion/source/ge_data_profiler.py +9 -1
  12. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  13. datahub/ingestion/source/grafana/field_utils.py +307 -0
  14. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  15. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  16. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  17. datahub/ingestion/source/grafana/lineage.py +202 -0
  18. datahub/ingestion/source/grafana/models.py +120 -0
  19. datahub/ingestion/source/grafana/report.py +91 -0
  20. datahub/ingestion/source/grafana/types.py +16 -0
  21. datahub/ingestion/source/hex/hex.py +8 -0
  22. datahub/ingestion/source/looker/looker_source.py +9 -0
  23. datahub/ingestion/source/looker/lookml_source.py +8 -0
  24. datahub/ingestion/source/mongodb.py +11 -1
  25. datahub/ingestion/source/redshift/redshift.py +8 -1
  26. datahub/ingestion/source/s3/source.py +9 -1
  27. datahub/ingestion/source/sql/athena.py +8 -2
  28. datahub/ingestion/source/sql/clickhouse.py +9 -0
  29. datahub/ingestion/source/sql_queries.py +2 -2
  30. datahub/ingestion/source/unity/proxy.py +8 -8
  31. datahub/metadata/_internal_schema_classes.py +18 -3
  32. datahub/metadata/schema.avsc +10 -1
  33. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +10 -1
  34. datahub/sdk/dataset.py +44 -0
  35. datahub/sdk/search_filters.py +34 -14
  36. datahub/sql_parsing/sql_parsing_aggregator.py +5 -0
  37. datahub/telemetry/telemetry.py +4 -1
  38. {acryl_datahub-1.2.0.2rc1.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/WHEEL +0 -0
  39. {acryl_datahub-1.2.0.2rc1.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/entry_points.txt +0 -0
  40. {acryl_datahub-1.2.0.2rc1.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/licenses/LICENSE +0 -0
  41. {acryl_datahub-1.2.0.2rc1.dist-info → acryl_datahub-1.2.0.2rc3.dist-info}/top_level.txt +0 -0
@@ -521,9 +521,9 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
521
521
  @cached(cachetools.FIFOCache(maxsize=100))
522
522
  def get_schema_tags(self, catalog: str) -> Dict[str, List[UnityCatalogTag]]:
523
523
  """Optimized version using databricks-sql"""
524
- logger.info(f"Fetching schema tags for catalog: {catalog}")
524
+ logger.info(f"Fetching schema tags for catalog: `{catalog}`")
525
525
 
526
- query = f"SELECT * FROM {catalog}.information_schema.schema_tags"
526
+ query = f"SELECT * FROM `{catalog}`.information_schema.schema_tags"
527
527
  rows = self._execute_sql_query(query)
528
528
 
529
529
  result_dict: Dict[str, List[UnityCatalogTag]] = {}
@@ -544,9 +544,9 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
544
544
  @cached(cachetools.FIFOCache(maxsize=100))
545
545
  def get_catalog_tags(self, catalog: str) -> Dict[str, List[UnityCatalogTag]]:
546
546
  """Optimized version using databricks-sql"""
547
- logger.info(f"Fetching table tags for catalog: {catalog}")
547
+ logger.info(f"Fetching table tags for catalog: `{catalog}`")
548
548
 
549
- query = f"SELECT * FROM {catalog}.information_schema.catalog_tags"
549
+ query = f"SELECT * FROM `{catalog}`.information_schema.catalog_tags"
550
550
  rows = self._execute_sql_query(query)
551
551
 
552
552
  result_dict: Dict[str, List[UnityCatalogTag]] = {}
@@ -566,9 +566,9 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
566
566
  @cached(cachetools.FIFOCache(maxsize=100))
567
567
  def get_table_tags(self, catalog: str) -> Dict[str, List[UnityCatalogTag]]:
568
568
  """Optimized version using databricks-sql"""
569
- logger.info(f"Fetching table tags for catalog: {catalog}")
569
+ logger.info(f"Fetching table tags for catalog: `{catalog}`")
570
570
 
571
- query = f"SELECT * FROM {catalog}.information_schema.table_tags"
571
+ query = f"SELECT * FROM `{catalog}`.information_schema.table_tags"
572
572
  rows = self._execute_sql_query(query)
573
573
 
574
574
  result_dict: Dict[str, List[UnityCatalogTag]] = {}
@@ -589,9 +589,9 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
589
589
  @cached(cachetools.FIFOCache(maxsize=100))
590
590
  def get_column_tags(self, catalog: str) -> Dict[str, List[UnityCatalogTag]]:
591
591
  """Optimized version using databricks-sql"""
592
- logger.info(f"Fetching column tags for catalog: {catalog}")
592
+ logger.info(f"Fetching column tags for catalog: `{catalog}`")
593
593
 
594
- query = f"SELECT * FROM {catalog}.information_schema.column_tags"
594
+ query = f"SELECT * FROM `{catalog}`.information_schema.column_tags"
595
595
  rows = self._execute_sql_query(query)
596
596
 
597
597
  result_dict: Dict[str, List[UnityCatalogTag]] = {}
@@ -20163,23 +20163,24 @@ class DataHubPageModuleVisibilityClass(DictWrapper):
20163
20163
 
20164
20164
 
20165
20165
  class HierarchyModuleParamsClass(DictWrapper):
20166
- """The params required if the module is type HIERARCHY_VIEW
20167
- TODO: add filters
20168
- relatedEntitiesFilter: optional Filter"""
20166
+ """The params required if the module is type HIERARCHY_VIEW"""
20169
20167
 
20170
20168
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.module.HierarchyModuleParams")
20171
20169
  def __init__(self,
20172
20170
  showRelatedEntities: bool,
20173
20171
  assetUrns: Union[None, List[str]]=None,
20172
+ relatedEntitiesFilterJson: Union[None, str]=None,
20174
20173
  ):
20175
20174
  super().__init__()
20176
20175
 
20177
20176
  self.assetUrns = assetUrns
20178
20177
  self.showRelatedEntities = showRelatedEntities
20178
+ self.relatedEntitiesFilterJson = relatedEntitiesFilterJson
20179
20179
 
20180
20180
  def _restore_defaults(self) -> None:
20181
20181
  self.assetUrns = self.RECORD_SCHEMA.fields_dict["assetUrns"].default
20182
20182
  self.showRelatedEntities = bool()
20183
+ self.relatedEntitiesFilterJson = self.RECORD_SCHEMA.fields_dict["relatedEntitiesFilterJson"].default
20183
20184
 
20184
20185
 
20185
20186
  @property
@@ -20202,6 +20203,20 @@ class HierarchyModuleParamsClass(DictWrapper):
20202
20203
  self._inner_dict['showRelatedEntities'] = value
20203
20204
 
20204
20205
 
20206
+ @property
20207
+ def relatedEntitiesFilterJson(self) -> Union[None, str]:
20208
+ """Optional filters to filter relatedEntities (assetUrns) out
20209
+
20210
+ The stringified json representing the logical predicate built in the UI to select assets.
20211
+ This predicate is turned into orFilters to send through graphql since graphql doesn't support
20212
+ arbitrary nesting. This string is used to restore the UI for this logical predicate."""
20213
+ return self._inner_dict.get('relatedEntitiesFilterJson') # type: ignore
20214
+
20215
+ @relatedEntitiesFilterJson.setter
20216
+ def relatedEntitiesFilterJson(self, value: Union[None, str]) -> None:
20217
+ self._inner_dict['relatedEntitiesFilterJson'] = value
20218
+
20219
+
20205
20220
  class LinkModuleParamsClass(DictWrapper):
20206
20221
  # No docs available.
20207
20222
 
@@ -17844,9 +17844,18 @@
17844
17844
  {
17845
17845
  "type": "boolean",
17846
17846
  "name": "showRelatedEntities"
17847
+ },
17848
+ {
17849
+ "type": [
17850
+ "null",
17851
+ "string"
17852
+ ],
17853
+ "name": "relatedEntitiesFilterJson",
17854
+ "default": null,
17855
+ "doc": "Optional filters to filter relatedEntities (assetUrns) out\n\nThe stringified json representing the logical predicate built in the UI to select assets.\nThis predicate is turned into orFilters to send through graphql since graphql doesn't support\narbitrary nesting. This string is used to restore the UI for this logical predicate."
17847
17856
  }
17848
17857
  ],
17849
- "doc": "The params required if the module is type HIERARCHY_VIEW\nTODO: add filters\nrelatedEntitiesFilter: optional Filter"
17858
+ "doc": "The params required if the module is type HIERARCHY_VIEW"
17850
17859
  }
17851
17860
  ],
17852
17861
  "name": "hierarchyViewParams",
@@ -181,9 +181,18 @@
181
181
  {
182
182
  "type": "boolean",
183
183
  "name": "showRelatedEntities"
184
+ },
185
+ {
186
+ "type": [
187
+ "null",
188
+ "string"
189
+ ],
190
+ "name": "relatedEntitiesFilterJson",
191
+ "default": null,
192
+ "doc": "Optional filters to filter relatedEntities (assetUrns) out\n\nThe stringified json representing the logical predicate built in the UI to select assets.\nThis predicate is turned into orFilters to send through graphql since graphql doesn't support\narbitrary nesting. This string is used to restore the UI for this logical predicate."
184
193
  }
185
194
  ],
186
- "doc": "The params required if the module is type HIERARCHY_VIEW\nTODO: add filters\nrelatedEntitiesFilter: optional Filter"
195
+ "doc": "The params required if the module is type HIERARCHY_VIEW"
187
196
  }
188
197
  ],
189
198
  "name": "hierarchyViewParams",
datahub/sdk/dataset.py CHANGED
@@ -72,6 +72,11 @@ UpstreamLineageInputType: TypeAlias = Union[
72
72
  Dict[DatasetUrnOrStr, ColumnLineageMapping],
73
73
  ]
74
74
 
75
+ ViewDefinitionInputType: TypeAlias = Union[
76
+ str,
77
+ models.ViewPropertiesClass,
78
+ ]
79
+
75
80
 
76
81
  def _parse_upstream_input(
77
82
  upstream_input: UpstreamInputType,
@@ -467,6 +472,7 @@ class Dataset(
467
472
  custom_properties: Optional[Dict[str, str]] = None,
468
473
  created: Optional[datetime] = None,
469
474
  last_modified: Optional[datetime] = None,
475
+ view_definition: Optional[ViewDefinitionInputType] = None,
470
476
  # Standard aspects.
471
477
  parent_container: ParentContainerInputType | Unset = unset,
472
478
  subtype: Optional[str] = None,
@@ -495,6 +501,7 @@ class Dataset(
495
501
  custom_properties: Optional dictionary of custom properties.
496
502
  created: Optional creation timestamp.
497
503
  last_modified: Optional last modification timestamp.
504
+ view_definition: Optional view definition for the dataset.
498
505
  parent_container: Optional parent container for this dataset.
499
506
  subtype: Optional subtype of the dataset.
500
507
  owners: Optional list of owners.
@@ -536,6 +543,8 @@ class Dataset(
536
543
  self.set_created(created)
537
544
  if last_modified is not None:
538
545
  self.set_last_modified(last_modified)
546
+ if view_definition is not None:
547
+ self.set_view_definition(view_definition)
539
548
 
540
549
  if parent_container is not unset:
541
550
  self._set_container(parent_container)
@@ -717,6 +726,41 @@ class Dataset(
717
726
  def set_last_modified(self, last_modified: datetime) -> None:
718
727
  self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
719
728
 
729
+ @property
730
+ def view_definition(self) -> Optional[models.ViewPropertiesClass]:
731
+ """Get the view definition of the dataset.
732
+
733
+ Under typical usage, this will be present if the subtype is "View".
734
+
735
+ Returns:
736
+ The view definition if set, None otherwise.
737
+ """
738
+ return self._get_aspect(models.ViewPropertiesClass)
739
+
740
+ def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
741
+ """Set the view definition of the dataset.
742
+
743
+ If you're setting a view definition, subtype should typically be set to "view".
744
+
745
+ If a string is provided, it will be treated as a SQL view definition. To set
746
+ a custom language or other properties, provide a ViewPropertiesClass object.
747
+
748
+ Args:
749
+ view_definition: The view definition to set.
750
+ """
751
+ if isinstance(view_definition, models.ViewPropertiesClass):
752
+ self._set_aspect(view_definition)
753
+ elif isinstance(view_definition, str):
754
+ self._set_aspect(
755
+ models.ViewPropertiesClass(
756
+ materialized=False,
757
+ viewLogic=view_definition,
758
+ viewLanguage="SQL",
759
+ )
760
+ )
761
+ else:
762
+ assert_never(view_definition)
763
+
720
764
  def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
721
765
  schema_metadata = self._get_aspect(models.SchemaMetadataClass)
722
766
  if schema_metadata is None:
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
+ import json
4
5
  from typing import (
5
6
  TYPE_CHECKING,
6
7
  Annotated,
@@ -406,26 +407,45 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
406
407
  else:
407
408
  from pydantic import Discriminator, Tag
408
409
 
410
+ def _parse_json_from_string(value: Any) -> Any:
411
+ if isinstance(value, str):
412
+ try:
413
+ return json.loads(value)
414
+ except json.JSONDecodeError:
415
+ return value
416
+ else:
417
+ return value
418
+
409
419
  # TODO: Once we're fully on pydantic 2, we can use a RootModel here.
410
420
  # That way we'd be able to attach methods to the Filter type.
411
421
  # e.g. replace load_filters(...) with Filter.load(...)
412
422
  Filter = Annotated[
413
- Union[
414
- Annotated[_And, Tag(_And._field_discriminator())],
415
- Annotated[_Or, Tag(_Or._field_discriminator())],
416
- Annotated[_Not, Tag(_Not._field_discriminator())],
417
- Annotated[_EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())],
418
- Annotated[
419
- _EntitySubtypeFilter, Tag(_EntitySubtypeFilter._field_discriminator())
423
+ Annotated[
424
+ Union[
425
+ Annotated[_And, Tag(_And._field_discriminator())],
426
+ Annotated[_Or, Tag(_Or._field_discriminator())],
427
+ Annotated[_Not, Tag(_Not._field_discriminator())],
428
+ Annotated[
429
+ _EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
430
+ ],
431
+ Annotated[
432
+ _EntitySubtypeFilter,
433
+ Tag(_EntitySubtypeFilter._field_discriminator()),
434
+ ],
435
+ Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
436
+ Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
437
+ Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
438
+ Annotated[
439
+ _ContainerFilter, Tag(_ContainerFilter._field_discriminator())
440
+ ],
441
+ Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
442
+ Annotated[
443
+ _CustomCondition, Tag(_CustomCondition._field_discriminator())
444
+ ],
420
445
  ],
421
- Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
422
- Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
423
- Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
424
- Annotated[_ContainerFilter, Tag(_ContainerFilter._field_discriminator())],
425
- Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
426
- Annotated[_CustomCondition, Tag(_CustomCondition._field_discriminator())],
446
+ Discriminator(_filter_discriminator),
427
447
  ],
428
- Discriminator(_filter_discriminator),
448
+ pydantic.BeforeValidator(_parse_json_from_string),
429
449
  ]
430
450
 
431
451
  # Required to resolve forward references to "Filter"
@@ -49,6 +49,7 @@ from datahub.sql_parsing.sqlglot_lineage import (
49
49
  sqlglot_lineage,
50
50
  )
51
51
  from datahub.sql_parsing.sqlglot_utils import (
52
+ DialectOrStr,
52
53
  _parse_statement,
53
54
  get_query_fingerprint,
54
55
  try_format_query,
@@ -109,6 +110,7 @@ class ObservedQuery:
109
110
  default_schema: Optional[str] = None
110
111
  query_hash: Optional[str] = None
111
112
  usage_multiplier: int = 1
113
+ override_dialect: Optional[DialectOrStr] = None
112
114
 
113
115
  # Use this to store additional key-value information about the query for debugging.
114
116
  extra_info: Optional[dict] = None
@@ -834,6 +836,7 @@ class SqlParsingAggregator(Closeable):
834
836
  session_id=session_id,
835
837
  timestamp=observed.timestamp,
836
838
  user=observed.user,
839
+ override_dialect=observed.override_dialect,
837
840
  )
838
841
  if parsed.debug_info.error:
839
842
  self.report.observed_query_parse_failures.append(
@@ -1168,6 +1171,7 @@ class SqlParsingAggregator(Closeable):
1168
1171
  session_id: str = _MISSING_SESSION_ID,
1169
1172
  timestamp: Optional[datetime] = None,
1170
1173
  user: Optional[Union[CorpUserUrn, CorpGroupUrn]] = None,
1174
+ override_dialect: Optional[DialectOrStr] = None,
1171
1175
  ) -> SqlParsingResult:
1172
1176
  with self.report.sql_parsing_timer:
1173
1177
  parsed = sqlglot_lineage(
@@ -1175,6 +1179,7 @@ class SqlParsingAggregator(Closeable):
1175
1179
  schema_resolver=schema_resolver,
1176
1180
  default_db=default_db,
1177
1181
  default_schema=default_schema,
1182
+ override_dialect=override_dialect,
1178
1183
  )
1179
1184
  self.report.num_sql_parsed += 1
1180
1185
 
@@ -272,7 +272,10 @@ class Telemetry:
272
272
  if self.sentry_enabled:
273
273
  import sentry_sdk
274
274
 
275
- sentry_sdk.set_tags(properties)
275
+ # Note: once we're on sentry-sdk 2.1.0+, we can use sentry_sdk.set_tags(properties)
276
+ # See https://github.com/getsentry/sentry-python/commit/6c960d752c7c7aff3fd7469d2e9ad98f19663aa8
277
+ for key, value in properties.items():
278
+ sentry_sdk.set_tag(key, value)
276
279
 
277
280
  def init_capture_exception(self) -> None:
278
281
  if self.sentry_enabled: