acryl-datahub 1.2.0.2rc2__py3-none-any.whl → 1.2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (51) hide show
  1. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/METADATA +2511 -2509
  2. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/RECORD +51 -43
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataset/dataset.py +13 -1
  5. datahub/emitter/rest_emitter.py +18 -5
  6. datahub/ingestion/autogenerated/capability_summary.json +97 -6
  7. datahub/ingestion/graph/client.py +19 -3
  8. datahub/ingestion/sink/datahub_rest.py +2 -0
  9. datahub/ingestion/source/aws/glue.py +8 -0
  10. datahub/ingestion/source/cassandra/cassandra.py +5 -7
  11. datahub/ingestion/source/common/subtypes.py +2 -0
  12. datahub/ingestion/source/datahub/datahub_source.py +3 -0
  13. datahub/ingestion/source/dbt/dbt_common.py +10 -0
  14. datahub/ingestion/source/delta_lake/source.py +1 -0
  15. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  16. datahub/ingestion/source/grafana/field_utils.py +307 -0
  17. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  18. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  19. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  20. datahub/ingestion/source/grafana/lineage.py +202 -0
  21. datahub/ingestion/source/grafana/models.py +120 -0
  22. datahub/ingestion/source/grafana/report.py +91 -0
  23. datahub/ingestion/source/grafana/types.py +16 -0
  24. datahub/ingestion/source/hex/hex.py +8 -0
  25. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  26. datahub/ingestion/source/looker/looker_source.py +9 -0
  27. datahub/ingestion/source/looker/lookml_source.py +8 -0
  28. datahub/ingestion/source/mongodb.py +11 -1
  29. datahub/ingestion/source/redshift/redshift.py +8 -1
  30. datahub/ingestion/source/s3/source.py +9 -1
  31. datahub/ingestion/source/sql/athena.py +8 -2
  32. datahub/ingestion/source/sql/athena_properties_extractor.py +2 -2
  33. datahub/ingestion/source/sql/clickhouse.py +9 -0
  34. datahub/ingestion/source/sql/vertica.py +3 -0
  35. datahub/ingestion/source/sql_queries.py +88 -46
  36. datahub/ingestion/source/unity/proxy.py +112 -22
  37. datahub/ingestion/source/unity/source.py +7 -10
  38. datahub/metadata/_internal_schema_classes.py +18 -3
  39. datahub/metadata/schema.avsc +19 -1
  40. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +10 -1
  41. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  42. datahub/metadata/schemas/MetadataChangeEvent.avsc +9 -0
  43. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  44. datahub/sdk/dataset.py +44 -0
  45. datahub/sdk/search_filters.py +34 -14
  46. datahub/sql_parsing/sql_parsing_aggregator.py +5 -0
  47. datahub/telemetry/telemetry.py +4 -1
  48. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/WHEEL +0 -0
  49. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/entry_points.txt +0 -0
  50. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/licenses/LICENSE +0 -0
  51. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
+ import json
4
5
  from typing import (
5
6
  TYPE_CHECKING,
6
7
  Annotated,
@@ -406,26 +407,45 @@ if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
406
407
  else:
407
408
  from pydantic import Discriminator, Tag
408
409
 
410
+ def _parse_json_from_string(value: Any) -> Any:
411
+ if isinstance(value, str):
412
+ try:
413
+ return json.loads(value)
414
+ except json.JSONDecodeError:
415
+ return value
416
+ else:
417
+ return value
418
+
409
419
  # TODO: Once we're fully on pydantic 2, we can use a RootModel here.
410
420
  # That way we'd be able to attach methods to the Filter type.
411
421
  # e.g. replace load_filters(...) with Filter.load(...)
412
422
  Filter = Annotated[
413
- Union[
414
- Annotated[_And, Tag(_And._field_discriminator())],
415
- Annotated[_Or, Tag(_Or._field_discriminator())],
416
- Annotated[_Not, Tag(_Not._field_discriminator())],
417
- Annotated[_EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())],
418
- Annotated[
419
- _EntitySubtypeFilter, Tag(_EntitySubtypeFilter._field_discriminator())
423
+ Annotated[
424
+ Union[
425
+ Annotated[_And, Tag(_And._field_discriminator())],
426
+ Annotated[_Or, Tag(_Or._field_discriminator())],
427
+ Annotated[_Not, Tag(_Not._field_discriminator())],
428
+ Annotated[
429
+ _EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
430
+ ],
431
+ Annotated[
432
+ _EntitySubtypeFilter,
433
+ Tag(_EntitySubtypeFilter._field_discriminator()),
434
+ ],
435
+ Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
436
+ Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
437
+ Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
438
+ Annotated[
439
+ _ContainerFilter, Tag(_ContainerFilter._field_discriminator())
440
+ ],
441
+ Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
442
+ Annotated[
443
+ _CustomCondition, Tag(_CustomCondition._field_discriminator())
444
+ ],
420
445
  ],
421
- Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
422
- Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
423
- Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
424
- Annotated[_ContainerFilter, Tag(_ContainerFilter._field_discriminator())],
425
- Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
426
- Annotated[_CustomCondition, Tag(_CustomCondition._field_discriminator())],
446
+ Discriminator(_filter_discriminator),
427
447
  ],
428
- Discriminator(_filter_discriminator),
448
+ pydantic.BeforeValidator(_parse_json_from_string),
429
449
  ]
430
450
 
431
451
  # Required to resolve forward references to "Filter"
@@ -49,6 +49,7 @@ from datahub.sql_parsing.sqlglot_lineage import (
49
49
  sqlglot_lineage,
50
50
  )
51
51
  from datahub.sql_parsing.sqlglot_utils import (
52
+ DialectOrStr,
52
53
  _parse_statement,
53
54
  get_query_fingerprint,
54
55
  try_format_query,
@@ -109,6 +110,7 @@ class ObservedQuery:
109
110
  default_schema: Optional[str] = None
110
111
  query_hash: Optional[str] = None
111
112
  usage_multiplier: int = 1
113
+ override_dialect: Optional[DialectOrStr] = None
112
114
 
113
115
  # Use this to store additional key-value information about the query for debugging.
114
116
  extra_info: Optional[dict] = None
@@ -834,6 +836,7 @@ class SqlParsingAggregator(Closeable):
834
836
  session_id=session_id,
835
837
  timestamp=observed.timestamp,
836
838
  user=observed.user,
839
+ override_dialect=observed.override_dialect,
837
840
  )
838
841
  if parsed.debug_info.error:
839
842
  self.report.observed_query_parse_failures.append(
@@ -1168,6 +1171,7 @@ class SqlParsingAggregator(Closeable):
1168
1171
  session_id: str = _MISSING_SESSION_ID,
1169
1172
  timestamp: Optional[datetime] = None,
1170
1173
  user: Optional[Union[CorpUserUrn, CorpGroupUrn]] = None,
1174
+ override_dialect: Optional[DialectOrStr] = None,
1171
1175
  ) -> SqlParsingResult:
1172
1176
  with self.report.sql_parsing_timer:
1173
1177
  parsed = sqlglot_lineage(
@@ -1175,6 +1179,7 @@ class SqlParsingAggregator(Closeable):
1175
1179
  schema_resolver=schema_resolver,
1176
1180
  default_db=default_db,
1177
1181
  default_schema=default_schema,
1182
+ override_dialect=override_dialect,
1178
1183
  )
1179
1184
  self.report.num_sql_parsed += 1
1180
1185
 
@@ -272,7 +272,10 @@ class Telemetry:
272
272
  if self.sentry_enabled:
273
273
  import sentry_sdk
274
274
 
275
- sentry_sdk.set_tags(properties)
275
+ # Note: once we're on sentry-sdk 2.1.0+, we can use sentry_sdk.set_tags(properties)
276
+ # See https://github.com/getsentry/sentry-python/commit/6c960d752c7c7aff3fd7469d2e9ad98f19663aa8
277
+ for key, value in properties.items():
278
+ sentry_sdk.set_tag(key, value)
276
279
 
277
280
  def init_capture_exception(self) -> None:
278
281
  if self.sentry_enabled: