acryl-datahub 1.2.0.1__py3-none-any.whl → 1.2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (54) hide show
  1. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/METADATA +2574 -2572
  2. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/RECORD +54 -46
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataset/dataset.py +13 -1
  5. datahub/emitter/rest_emitter.py +3 -1
  6. datahub/ingestion/autogenerated/capability_summary.json +97 -6
  7. datahub/ingestion/source/abs/source.py +5 -29
  8. datahub/ingestion/source/aws/glue.py +8 -0
  9. datahub/ingestion/source/cassandra/cassandra.py +5 -7
  10. datahub/ingestion/source/common/subtypes.py +2 -0
  11. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  12. datahub/ingestion/source/datahub/datahub_source.py +3 -0
  13. datahub/ingestion/source/dbt/dbt_common.py +69 -2
  14. datahub/ingestion/source/delta_lake/source.py +1 -0
  15. datahub/ingestion/source/ge_data_profiler.py +9 -1
  16. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  17. datahub/ingestion/source/grafana/field_utils.py +307 -0
  18. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  19. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  20. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  21. datahub/ingestion/source/grafana/lineage.py +202 -0
  22. datahub/ingestion/source/grafana/models.py +120 -0
  23. datahub/ingestion/source/grafana/report.py +91 -0
  24. datahub/ingestion/source/grafana/types.py +16 -0
  25. datahub/ingestion/source/hex/hex.py +8 -0
  26. datahub/ingestion/source/looker/looker_common.py +40 -4
  27. datahub/ingestion/source/looker/looker_source.py +9 -0
  28. datahub/ingestion/source/looker/lookml_source.py +8 -0
  29. datahub/ingestion/source/mongodb.py +11 -1
  30. datahub/ingestion/source/redshift/redshift.py +8 -1
  31. datahub/ingestion/source/s3/source.py +14 -34
  32. datahub/ingestion/source/sql/athena.py +8 -2
  33. datahub/ingestion/source/sql/clickhouse.py +9 -0
  34. datahub/ingestion/source/sql/postgres.py +190 -1
  35. datahub/ingestion/source/sql_queries.py +111 -76
  36. datahub/ingestion/source/unity/proxy.py +8 -8
  37. datahub/metadata/_internal_schema_classes.py +96 -0
  38. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +2 -0
  39. datahub/metadata/schema.avsc +69 -0
  40. datahub/metadata/schemas/CorpUserSettings.avsc +10 -1
  41. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +42 -0
  42. datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -0
  43. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  44. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  45. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  46. datahub/sdk/dataset.py +44 -0
  47. datahub/sdk/search_filters.py +84 -15
  48. datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
  49. datahub/telemetry/telemetry.py +4 -1
  50. datahub/upgrade/upgrade.py +5 -3
  51. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/WHEEL +0 -0
  52. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/entry_points.txt +0 -0
  53. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/licenses/LICENSE +0 -0
  54. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/top_level.txt +0 -0
datahub/sdk/dataset.py CHANGED
@@ -72,6 +72,11 @@ UpstreamLineageInputType: TypeAlias = Union[
72
72
  Dict[DatasetUrnOrStr, ColumnLineageMapping],
73
73
  ]
74
74
 
75
+ ViewDefinitionInputType: TypeAlias = Union[
76
+ str,
77
+ models.ViewPropertiesClass,
78
+ ]
79
+
75
80
 
76
81
  def _parse_upstream_input(
77
82
  upstream_input: UpstreamInputType,
@@ -467,6 +472,7 @@ class Dataset(
467
472
  custom_properties: Optional[Dict[str, str]] = None,
468
473
  created: Optional[datetime] = None,
469
474
  last_modified: Optional[datetime] = None,
475
+ view_definition: Optional[ViewDefinitionInputType] = None,
470
476
  # Standard aspects.
471
477
  parent_container: ParentContainerInputType | Unset = unset,
472
478
  subtype: Optional[str] = None,
@@ -495,6 +501,7 @@ class Dataset(
495
501
  custom_properties: Optional dictionary of custom properties.
496
502
  created: Optional creation timestamp.
497
503
  last_modified: Optional last modification timestamp.
504
+ view_definition: Optional view definition for the dataset.
498
505
  parent_container: Optional parent container for this dataset.
499
506
  subtype: Optional subtype of the dataset.
500
507
  owners: Optional list of owners.
@@ -536,6 +543,8 @@ class Dataset(
536
543
  self.set_created(created)
537
544
  if last_modified is not None:
538
545
  self.set_last_modified(last_modified)
546
+ if view_definition is not None:
547
+ self.set_view_definition(view_definition)
539
548
 
540
549
  if parent_container is not unset:
541
550
  self._set_container(parent_container)
@@ -717,6 +726,41 @@ class Dataset(
717
726
  def set_last_modified(self, last_modified: datetime) -> None:
718
727
  self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
719
728
 
729
+ @property
730
+ def view_definition(self) -> Optional[models.ViewPropertiesClass]:
731
+ """Get the view definition of the dataset.
732
+
733
+ Under typical usage, this will be present if the subtype is "View".
734
+
735
+ Returns:
736
+ The view definition if set, None otherwise.
737
+ """
738
+ return self._get_aspect(models.ViewPropertiesClass)
739
+
740
+ def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
741
+ """Set the view definition of the dataset.
742
+
743
+ If you're setting a view definition, subtype should typically be set to "view".
744
+
745
+ If a string is provided, it will be treated as a SQL view definition. To set
746
+ a custom language or other properties, provide a ViewPropertiesClass object.
747
+
748
+ Args:
749
+ view_definition: The view definition to set.
750
+ """
751
+ if isinstance(view_definition, models.ViewPropertiesClass):
752
+ self._set_aspect(view_definition)
753
+ elif isinstance(view_definition, str):
754
+ self._set_aspect(
755
+ models.ViewPropertiesClass(
756
+ materialized=False,
757
+ viewLogic=view_definition,
758
+ viewLanguage="SQL",
759
+ )
760
+ )
761
+ else:
762
+ assert_never(view_definition)
763
+
720
764
  def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
721
765
  schema_metadata = self._get_aspect(models.SchemaMetadataClass)
722
766
  if schema_metadata is None:
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
+ import json
4
5
  from typing import (
5
6
  TYPE_CHECKING,
6
7
  Annotated,
@@ -29,7 +30,7 @@ from datahub.ingestion.graph.filters import (
29
30
  _get_status_filter,
30
31
  )
31
32
  from datahub.metadata.schema_classes import EntityTypeName
32
- from datahub.metadata.urns import DataPlatformUrn, DomainUrn
33
+ from datahub.metadata.urns import ContainerUrn, DataPlatformUrn, DomainUrn
33
34
 
34
35
  _AndSearchFilterRule = TypedDict(
35
36
  "_AndSearchFilterRule", {"and": List[SearchFilterRule]}
@@ -81,7 +82,7 @@ class _EntityTypeFilter(_BaseFilter):
81
82
  ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
82
83
 
83
84
  entity_type: List[str] = pydantic.Field(
84
- description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
85
+ description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', 'dataProduct', etc.",
85
86
  )
86
87
 
87
88
  def _build_rule(self) -> SearchFilterRule:
@@ -174,6 +175,39 @@ class _DomainFilter(_BaseFilter):
174
175
  return [{"and": [self._build_rule()]}]
175
176
 
176
177
 
178
+ class _ContainerFilter(_BaseFilter):
179
+ container: List[str]
180
+ direct_descendants_only: bool = pydantic.Field(
181
+ default=False,
182
+ description="If true, only entities that are direct descendants of the container will be returned.",
183
+ )
184
+
185
+ @pydantic.validator("container", each_item=True)
186
+ def validate_container(cls, v: str) -> str:
187
+ return str(ContainerUrn.from_string(v))
188
+
189
+ @classmethod
190
+ def _field_discriminator(cls) -> str:
191
+ return "container"
192
+
193
+ def _build_rule(self) -> SearchFilterRule:
194
+ if self.direct_descendants_only:
195
+ return SearchFilterRule(
196
+ field="container",
197
+ condition="EQUAL",
198
+ values=self.container,
199
+ )
200
+ else:
201
+ return SearchFilterRule(
202
+ field="browsePathV2",
203
+ condition="CONTAIN",
204
+ values=self.container,
205
+ )
206
+
207
+ def compile(self) -> _OrFilters:
208
+ return [{"and": [self._build_rule()]}]
209
+
210
+
177
211
  class _EnvFilter(_BaseFilter):
178
212
  # Note that not all entity types have an env (e.g. dashboards / charts).
179
213
  # If the env filter is specified, these will be excluded.
@@ -342,6 +376,8 @@ def _filter_discriminator(v: Any) -> Optional[str]:
342
376
  keys = list(v.keys())
343
377
  if len(keys) == 1:
344
378
  return keys[0]
379
+ elif set(keys).issuperset({"container"}):
380
+ return _ContainerFilter._field_discriminator()
345
381
  elif set(keys).issuperset({"field", "condition"}):
346
382
  return _CustomCondition._field_discriminator()
347
383
 
@@ -360,6 +396,7 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
360
396
  _StatusFilter,
361
397
  _PlatformFilter,
362
398
  _DomainFilter,
399
+ _ContainerFilter,
363
400
  _EnvFilter,
364
401
  _CustomCondition,
365
402
  ]
@@ -370,25 +407,45 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
370
407
  else:
371
408
  from pydantic import Discriminator, Tag
372
409
 
410
+ def _parse_json_from_string(value: Any) -> Any:
411
+ if isinstance(value, str):
412
+ try:
413
+ return json.loads(value)
414
+ except json.JSONDecodeError:
415
+ return value
416
+ else:
417
+ return value
418
+
373
419
  # TODO: Once we're fully on pydantic 2, we can use a RootModel here.
374
420
  # That way we'd be able to attach methods to the Filter type.
375
421
  # e.g. replace load_filters(...) with Filter.load(...)
376
422
  Filter = Annotated[
377
- Union[
378
- Annotated[_And, Tag(_And._field_discriminator())],
379
- Annotated[_Or, Tag(_Or._field_discriminator())],
380
- Annotated[_Not, Tag(_Not._field_discriminator())],
381
- Annotated[_EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())],
382
- Annotated[
383
- _EntitySubtypeFilter, Tag(_EntitySubtypeFilter._field_discriminator())
423
+ Annotated[
424
+ Union[
425
+ Annotated[_And, Tag(_And._field_discriminator())],
426
+ Annotated[_Or, Tag(_Or._field_discriminator())],
427
+ Annotated[_Not, Tag(_Not._field_discriminator())],
428
+ Annotated[
429
+ _EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
430
+ ],
431
+ Annotated[
432
+ _EntitySubtypeFilter,
433
+ Tag(_EntitySubtypeFilter._field_discriminator()),
434
+ ],
435
+ Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
436
+ Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
437
+ Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
438
+ Annotated[
439
+ _ContainerFilter, Tag(_ContainerFilter._field_discriminator())
440
+ ],
441
+ Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
442
+ Annotated[
443
+ _CustomCondition, Tag(_CustomCondition._field_discriminator())
444
+ ],
384
445
  ],
385
- Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
386
- Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
387
- Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
388
- Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
389
- Annotated[_CustomCondition, Tag(_CustomCondition._field_discriminator())],
446
+ Discriminator(_filter_discriminator),
390
447
  ],
391
- Discriminator(_filter_discriminator),
448
+ pydantic.BeforeValidator(_parse_json_from_string),
392
449
  ]
393
450
 
394
451
  # Required to resolve forward references to "Filter"
@@ -468,6 +525,18 @@ class FilterDsl:
468
525
  def domain(domain: Union[str, Sequence[str]], /) -> _DomainFilter:
469
526
  return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
470
527
 
528
+ @staticmethod
529
+ def container(
530
+ container: Union[str, Sequence[str]],
531
+ /,
532
+ *,
533
+ direct_descendants_only: bool = False,
534
+ ) -> _ContainerFilter:
535
+ return _ContainerFilter(
536
+ container=[container] if isinstance(container, str) else container,
537
+ direct_descendants_only=direct_descendants_only,
538
+ )
539
+
471
540
  @staticmethod
472
541
  def env(env: Union[str, Sequence[str]], /) -> _EnvFilter:
473
542
  return _EnvFilter(env=[env] if isinstance(env, str) else env)
@@ -49,6 +49,7 @@ from datahub.sql_parsing.sqlglot_lineage import (
49
49
  sqlglot_lineage,
50
50
  )
51
51
  from datahub.sql_parsing.sqlglot_utils import (
52
+ DialectOrStr,
52
53
  _parse_statement,
53
54
  get_query_fingerprint,
54
55
  try_format_query,
@@ -109,6 +110,7 @@ class ObservedQuery:
109
110
  default_schema: Optional[str] = None
110
111
  query_hash: Optional[str] = None
111
112
  usage_multiplier: int = 1
113
+ override_dialect: Optional[DialectOrStr] = None
112
114
 
113
115
  # Use this to store additional key-value information about the query for debugging.
114
116
  extra_info: Optional[dict] = None
@@ -190,6 +192,7 @@ class QueryMetadata:
190
192
  source=models.QuerySourceClass.SYSTEM,
191
193
  created=self.make_created_audit_stamp(),
192
194
  lastModified=self.make_last_modified_audit_stamp(),
195
+ origin=self.origin.urn() if self.origin else None,
193
196
  )
194
197
 
195
198
 
@@ -833,6 +836,7 @@ class SqlParsingAggregator(Closeable):
833
836
  session_id=session_id,
834
837
  timestamp=observed.timestamp,
835
838
  user=observed.user,
839
+ override_dialect=observed.override_dialect,
836
840
  )
837
841
  if parsed.debug_info.error:
838
842
  self.report.observed_query_parse_failures.append(
@@ -1167,6 +1171,7 @@ class SqlParsingAggregator(Closeable):
1167
1171
  session_id: str = _MISSING_SESSION_ID,
1168
1172
  timestamp: Optional[datetime] = None,
1169
1173
  user: Optional[Union[CorpUserUrn, CorpGroupUrn]] = None,
1174
+ override_dialect: Optional[DialectOrStr] = None,
1170
1175
  ) -> SqlParsingResult:
1171
1176
  with self.report.sql_parsing_timer:
1172
1177
  parsed = sqlglot_lineage(
@@ -1174,6 +1179,7 @@ class SqlParsingAggregator(Closeable):
1174
1179
  schema_resolver=schema_resolver,
1175
1180
  default_db=default_db,
1176
1181
  default_schema=default_schema,
1182
+ override_dialect=override_dialect,
1177
1183
  )
1178
1184
  self.report.num_sql_parsed += 1
1179
1185
 
@@ -272,7 +272,10 @@ class Telemetry:
272
272
  if self.sentry_enabled:
273
273
  import sentry_sdk
274
274
 
275
- sentry_sdk.set_tags(properties)
275
+ # Note: once we're on sentry-sdk 2.1.0+, we can use sentry_sdk.set_tags(properties)
276
+ # See https://github.com/getsentry/sentry-python/commit/6c960d752c7c7aff3fd7469d2e9ad98f19663aa8
277
+ for key, value in properties.items():
278
+ sentry_sdk.set_tag(key, value)
276
279
 
277
280
  def init_capture_exception(self) -> None:
278
281
  if self.sentry_enabled:
@@ -352,9 +352,11 @@ def _maybe_print_upgrade_message(
352
352
  if version_stats.client.latest
353
353
  else None
354
354
  )
355
- client_server_compat = is_client_server_compatible(
356
- version_stats.client.current, version_stats.server.current
357
- )
355
+ client_server_compat = 0
356
+ if version_stats.server.current_server_type != "cloud":
357
+ client_server_compat = is_client_server_compatible(
358
+ version_stats.client.current, version_stats.server.current
359
+ )
358
360
 
359
361
  if latest_release_date and current_release_date:
360
362
  assert version_stats.client.latest