acryl-datahub 1.2.0.10rc2__py3-none-any.whl → 1.2.0.10rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (93) hide show
  1. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/METADATA +2525 -2609
  2. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/RECORD +93 -93
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/assertion/assertion.py +1 -1
  5. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  6. datahub/api/entities/dataproduct/dataproduct.py +6 -3
  7. datahub/api/entities/dataset/dataset.py +9 -18
  8. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  9. datahub/api/graphql/operation.py +10 -6
  10. datahub/cli/docker_check.py +2 -2
  11. datahub/configuration/common.py +29 -1
  12. datahub/configuration/connection_resolver.py +5 -2
  13. datahub/configuration/import_resolver.py +7 -4
  14. datahub/configuration/pydantic_migration_helpers.py +0 -9
  15. datahub/configuration/source_common.py +3 -2
  16. datahub/configuration/validate_field_deprecation.py +5 -2
  17. datahub/configuration/validate_field_removal.py +5 -2
  18. datahub/configuration/validate_field_rename.py +6 -5
  19. datahub/configuration/validate_multiline_string.py +5 -2
  20. datahub/ingestion/run/pipeline_config.py +2 -2
  21. datahub/ingestion/source/azure/azure_common.py +1 -1
  22. datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
  23. datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
  24. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  25. datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
  26. datahub/ingestion/source/datahub/config.py +8 -9
  27. datahub/ingestion/source/delta_lake/config.py +1 -1
  28. datahub/ingestion/source/dremio/dremio_config.py +3 -4
  29. datahub/ingestion/source/feast.py +8 -10
  30. datahub/ingestion/source/fivetran/config.py +1 -1
  31. datahub/ingestion/source/ge_profiling_config.py +26 -22
  32. datahub/ingestion/source/grafana/grafana_config.py +2 -2
  33. datahub/ingestion/source/grafana/models.py +12 -14
  34. datahub/ingestion/source/hex/hex.py +6 -1
  35. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  36. datahub/ingestion/source/kafka_connect/common.py +2 -2
  37. datahub/ingestion/source/looker/looker_common.py +55 -75
  38. datahub/ingestion/source/looker/looker_config.py +15 -4
  39. datahub/ingestion/source/looker/looker_source.py +445 -548
  40. datahub/ingestion/source/looker/lookml_config.py +1 -1
  41. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  42. datahub/ingestion/source/metadata/lineage.py +1 -1
  43. datahub/ingestion/source/mode.py +13 -5
  44. datahub/ingestion/source/nifi.py +1 -1
  45. datahub/ingestion/source/powerbi/config.py +14 -21
  46. datahub/ingestion/source/preset.py +1 -1
  47. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  48. datahub/ingestion/source/redshift/config.py +6 -3
  49. datahub/ingestion/source/salesforce.py +13 -9
  50. datahub/ingestion/source/schema/json_schema.py +14 -14
  51. datahub/ingestion/source/sigma/data_classes.py +3 -0
  52. datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
  53. datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
  54. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
  55. datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
  56. datahub/ingestion/source/sql/athena.py +2 -1
  57. datahub/ingestion/source/sql/clickhouse.py +12 -7
  58. datahub/ingestion/source/sql/cockroachdb.py +5 -3
  59. datahub/ingestion/source/sql/druid.py +2 -2
  60. datahub/ingestion/source/sql/hive.py +4 -3
  61. datahub/ingestion/source/sql/hive_metastore.py +7 -9
  62. datahub/ingestion/source/sql/mssql/source.py +2 -2
  63. datahub/ingestion/source/sql/mysql.py +2 -2
  64. datahub/ingestion/source/sql/oracle.py +3 -3
  65. datahub/ingestion/source/sql/presto.py +2 -1
  66. datahub/ingestion/source/sql/teradata.py +4 -4
  67. datahub/ingestion/source/sql/trino.py +2 -1
  68. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  69. datahub/ingestion/source/sql/vertica.py +1 -1
  70. datahub/ingestion/source/sql_queries.py +6 -6
  71. datahub/ingestion/source/state/checkpoint.py +5 -1
  72. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  73. datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
  74. datahub/ingestion/source/superset.py +29 -4
  75. datahub/ingestion/source/tableau/tableau.py +65 -11
  76. datahub/ingestion/source/tableau/tableau_common.py +5 -0
  77. datahub/ingestion/source/tableau/tableau_constant.py +1 -0
  78. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  79. datahub/ingestion/source/unity/config.py +7 -3
  80. datahub/ingestion/source/usage/usage_common.py +3 -3
  81. datahub/ingestion/source_config/pulsar.py +3 -1
  82. datahub/metadata/_internal_schema_classes.py +45 -1
  83. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  84. datahub/metadata/schema.avsc +24 -1
  85. datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
  86. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
  87. datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
  88. datahub/sdk/dashboard.py +0 -2
  89. datahub/sdk/search_filters.py +1 -7
  90. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/WHEEL +0 -0
  91. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/entry_points.txt +0 -0
  92. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/licenses/LICENSE +0 -0
  93. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
28
28
  User,
29
29
  WriteQuery,
30
30
  )
31
- from pydantic.class_validators import validator
31
+ from pydantic import validator
32
32
 
33
33
  import datahub.emitter.mce_builder as builder
34
34
  from datahub.api.entities.platformresource.platform_resource import (
@@ -36,7 +36,7 @@ from datahub.api.entities.platformresource.platform_resource import (
36
36
  PlatformResourceKey,
37
37
  )
38
38
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
39
- from datahub.emitter.mcp_builder import ContainerKey, create_embed_mcp
39
+ from datahub.emitter.mcp_builder import ContainerKey
40
40
  from datahub.ingestion.api.report import Report
41
41
  from datahub.ingestion.api.source import SourceReport
42
42
  from datahub.ingestion.source.common.subtypes import DatasetSubTypes
@@ -72,7 +72,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
72
72
  UpstreamClass,
73
73
  UpstreamLineage,
74
74
  )
75
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
76
75
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
77
76
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
78
77
  ArrayTypeClass,
@@ -90,21 +89,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
90
89
  )
91
90
  from datahub.metadata.schema_classes import (
92
91
  BrowsePathEntryClass,
93
- BrowsePathsClass,
94
92
  BrowsePathsV2Class,
95
- ContainerClass,
96
- DatasetPropertiesClass,
93
+ EmbedClass,
97
94
  EnumTypeClass,
98
95
  FineGrainedLineageClass,
99
96
  GlobalTagsClass,
100
97
  SchemaMetadataClass,
101
- StatusClass,
102
- SubTypesClass,
103
98
  TagAssociationClass,
104
99
  TagPropertiesClass,
105
100
  TagSnapshotClass,
106
101
  )
107
102
  from datahub.metadata.urns import TagUrn
103
+ from datahub.sdk.dataset import Dataset
108
104
  from datahub.sql_parsing.sqlglot_lineage import ColumnRef
109
105
  from datahub.utilities.lossy_collections import LossyList, LossySet
110
106
  from datahub.utilities.url_util import remove_port_from_url
@@ -1307,50 +1303,28 @@ class LookerExplore:
1307
1303
  reporter: SourceReport,
1308
1304
  base_url: str,
1309
1305
  extract_embed_urls: bool,
1310
- ) -> Optional[List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]]:
1311
- # We only generate MCE-s for explores that contain from clauses and do NOT contain joins
1312
- # All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph.
1313
-
1314
- dataset_snapshot = DatasetSnapshot(
1315
- urn=self.get_explore_urn(config),
1316
- aspects=[], # we append to this list later on
1317
- )
1318
-
1319
- model_key = gen_model_key(config, self.model_name)
1320
- browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
1321
- container = ContainerClass(container=model_key.as_urn())
1322
- dataset_snapshot.aspects.append(browse_paths)
1323
- dataset_snapshot.aspects.append(StatusClass(removed=False))
1324
-
1325
- custom_properties = {
1326
- "project": self.project_name,
1327
- "model": self.model_name,
1328
- "looker.explore.label": self.label,
1329
- "looker.explore.name": self.name,
1330
- "looker.explore.file": self.source_file,
1331
- }
1332
- dataset_props = DatasetPropertiesClass(
1333
- name=str(self.label) if self.label else LookerUtil._display_name(self.name),
1334
- description=self.description,
1335
- customProperties={
1336
- k: str(v) for k, v in custom_properties.items() if v is not None
1337
- },
1338
- )
1339
- dataset_props.externalUrl = self._get_url(base_url)
1306
+ ) -> Dataset:
1307
+ """
1308
+ Generate a Dataset metadata event for this Looker Explore.
1340
1309
 
1341
- dataset_snapshot.aspects.append(dataset_props)
1310
+ Only generates datasets for explores that contain FROM clauses and do NOT contain joins.
1311
+ Passthrough explores and joins are handled via lineage and do not need additional nodes.
1312
+ """
1313
+ upstream_lineage = None
1342
1314
  view_name_to_urn_map: Dict[str, str] = {}
1315
+
1343
1316
  if self.upstream_views is not None:
1344
1317
  assert self.project_name is not None
1345
- upstreams = []
1318
+ upstreams: list[UpstreamClass] = []
1346
1319
  observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc)
1320
+
1347
1321
  for view_ref in sorted(self.upstream_views):
1348
1322
  # set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility
1349
1323
  # if we raise error on file_path equal to None then existing test-cases will fail as mock data
1350
1324
  # doesn't have required attributes.
1351
1325
  file_path: str = (
1352
1326
  cast(str, self.upstream_views_file_path[view_ref.include])
1353
- if self.upstream_views_file_path[view_ref.include] is not None
1327
+ if self.upstream_views_file_path.get(view_ref.include) is not None
1354
1328
  else ViewFieldValue.NOT_AVAILABLE.value
1355
1329
  )
1356
1330
 
@@ -1377,7 +1351,7 @@ class LookerExplore:
1377
1351
  )
1378
1352
  view_name_to_urn_map[view_ref.include] = view_urn
1379
1353
 
1380
- fine_grained_lineages = []
1354
+ fine_grained_lineages: list[FineGrainedLineageClass] = []
1381
1355
  if config.extract_column_level_lineage:
1382
1356
  for field in self.fields or []:
1383
1357
  # Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
@@ -1418,9 +1392,11 @@ class LookerExplore:
1418
1392
  )
1419
1393
 
1420
1394
  upstream_lineage = UpstreamLineage(
1421
- upstreams=upstreams, fineGrainedLineages=fine_grained_lineages or None
1395
+ upstreams=upstreams,
1396
+ fineGrainedLineages=fine_grained_lineages or None,
1422
1397
  )
1423
- dataset_snapshot.aspects.append(upstream_lineage)
1398
+
1399
+ schema_metadata = None
1424
1400
  if self.fields is not None:
1425
1401
  schema_metadata = LookerUtil._get_schema(
1426
1402
  platform_name=config.platform_name,
@@ -1428,42 +1404,46 @@ class LookerExplore:
1428
1404
  view_fields=self.fields,
1429
1405
  reporter=reporter,
1430
1406
  )
1431
- if schema_metadata is not None:
1432
- dataset_snapshot.aspects.append(schema_metadata)
1433
-
1434
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
1435
- mcp = MetadataChangeProposalWrapper(
1436
- entityUrn=dataset_snapshot.urn,
1437
- aspect=SubTypesClass(typeNames=[DatasetSubTypes.LOOKER_EXPLORE]),
1438
- )
1439
1407
 
1440
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
1441
- mce,
1442
- mcp,
1443
- ]
1444
-
1445
- # Add tags
1446
- explore_tag_urns: List[TagAssociationClass] = [
1447
- TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags
1448
- ]
1449
- if explore_tag_urns:
1450
- dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns))
1408
+ extra_aspects: List[Union[GlobalTagsClass, EmbedClass]] = []
1451
1409
 
1452
- # If extracting embeds is enabled, produce an MCP for embed URL.
1410
+ explore_tag_urns: List[TagUrn] = [TagUrn(tag) for tag in self.tags]
1453
1411
  if extract_embed_urls:
1454
- embed_mcp = create_embed_mcp(
1455
- dataset_snapshot.urn, self._get_embed_url(base_url)
1456
- )
1457
- proposals.append(embed_mcp)
1412
+ extra_aspects.append(EmbedClass(renderUrl=self._get_embed_url(base_url)))
1458
1413
 
1459
- proposals.append(
1460
- MetadataChangeProposalWrapper(
1461
- entityUrn=dataset_snapshot.urn,
1462
- aspect=container,
1463
- )
1464
- )
1414
+ custom_properties: Dict[str, Optional[str]] = {
1415
+ "project": self.project_name,
1416
+ "model": self.model_name,
1417
+ "looker.explore.label": self.label,
1418
+ "looker.explore.name": self.name,
1419
+ "looker.explore.file": self.source_file,
1420
+ }
1465
1421
 
1466
- return proposals
1422
+ return Dataset(
1423
+ platform=config.platform_name,
1424
+ name=config.explore_naming_pattern.replace_variables(
1425
+ self.get_mapping(config)
1426
+ ),
1427
+ display_name=str(self.label)
1428
+ if self.label
1429
+ else LookerUtil._display_name(self.name),
1430
+ description=self.description,
1431
+ subtype=DatasetSubTypes.LOOKER_EXPLORE,
1432
+ env=config.env,
1433
+ platform_instance=config.platform_instance,
1434
+ custom_properties={
1435
+ k: str(v) for k, v in custom_properties.items() if v is not None
1436
+ },
1437
+ external_url=self._get_url(base_url),
1438
+ upstreams=upstream_lineage,
1439
+ schema=schema_metadata,
1440
+ parent_container=[
1441
+ "Explore",
1442
+ gen_model_key(config, self.model_name).as_urn(),
1443
+ ],
1444
+ tags=explore_tag_urns if explore_tag_urns else None,
1445
+ extra_aspects=extra_aspects,
1446
+ )
1467
1447
 
1468
1448
 
1469
1449
  def gen_project_key(config: LookerCommonConfig, project_name: str) -> LookMLProjectKey:
@@ -5,10 +5,14 @@ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
5
5
 
6
6
  import pydantic
7
7
  from looker_sdk.sdk.api40.models import DBConnection
8
- from pydantic import Field, validator
8
+ from pydantic import Field, model_validator, validator
9
9
 
10
10
  from datahub.configuration import ConfigModel
11
- from datahub.configuration.common import AllowDenyPattern, ConfigurationError
11
+ from datahub.configuration.common import (
12
+ AllowDenyPattern,
13
+ ConfigurationError,
14
+ HiddenFromDocs,
15
+ )
12
16
  from datahub.configuration.source_common import (
13
17
  EnvConfigMixin,
14
18
  PlatformInstanceConfigMixin,
@@ -43,6 +47,14 @@ class NamingPattern(ConfigModel):
43
47
  assert isinstance(v, str), "pattern must be a string"
44
48
  return {"pattern": v}
45
49
 
50
+ @model_validator(mode="before")
51
+ @classmethod
52
+ def pydantic_v2_accept_raw_pattern(cls, v):
53
+ # Pydantic v2 compatibility: handle string input by converting to dict
54
+ if isinstance(v, str):
55
+ return {"pattern": v}
56
+ return v
57
+
46
58
  @classmethod
47
59
  def pydantic_validate_pattern(cls, v):
48
60
  assert isinstance(v, NamingPattern)
@@ -132,11 +144,10 @@ class LookerCommonConfig(EnvConfigMixin, PlatformInstanceConfigMixin):
132
144
  description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more "
133
145
  "discoverable. When disabled, adds this information to the description of the column.",
134
146
  )
135
- platform_name: str = Field(
147
+ platform_name: HiddenFromDocs[str] = Field(
136
148
  # TODO: This shouldn't be part of the config.
137
149
  "looker",
138
150
  description="Default platform name.",
139
- hidden_from_docs=True,
140
151
  )
141
152
  extract_column_level_lineage: bool = Field(
142
153
  True,