acryl-datahub 1.1.0.4rc2__py3-none-any.whl → 1.1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5.dist-info}/METADATA +2528 -2530
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5.dist-info}/RECORD +156 -138
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -1
- datahub/cli/check_cli.py +65 -11
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +3 -4
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/emitter/rest_emitter.py +41 -8
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +47 -45
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/graph/client.py +73 -30
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +12 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/glue.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +49 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +7 -2
- datahub/ingestion/source/dbt/dbt_common.py +3 -1
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/powerbi/powerbi.py +0 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/redshift/redshift.py +17 -0
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +6 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +27 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_v2.py +14 -2
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -12
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/hive_metastore.py +0 -10
- datahub/ingestion/source/sql/mssql/source.py +24 -15
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/sql_common.py +11 -0
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +57 -2
- datahub/ingestion/source/tableau/tableau.py +57 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/proxy.py +4 -3
- datahub/ingestion/source/unity/source.py +56 -30
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -0
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1253 -536
- datahub/metadata/_urns/urn_defs.py +1797 -1685
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +16614 -16538
- datahub/metadata/schemas/ContainerProperties.avsc +2 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +2 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataJobInfo.avsc +2 -0
- datahub/metadata/schemas/DataProcessKey.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +4 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +2 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -0
- datahub/metadata/schemas/MLModelKey.avsc +2 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +2 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/sdk/datajob.py +39 -15
- datahub/sdk/lineage_client.py +2 -0
- datahub/sdk/main_client.py +14 -2
- datahub/sdk/search_client.py +4 -3
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +40 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.4rc2.dist-info → acryl_datahub-1.1.0.5.dist-info}/top_level.txt +0 -0
|
@@ -52,7 +52,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import Dataset
|
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
53
53
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
|
|
54
54
|
from datahub.metadata.schema_classes import (
|
|
55
|
-
ChangeTypeClass,
|
|
56
55
|
DatasetPropertiesClass,
|
|
57
56
|
SubTypesClass,
|
|
58
57
|
ViewPropertiesClass,
|
|
@@ -601,10 +600,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
601
600
|
yield dpi_aspect
|
|
602
601
|
|
|
603
602
|
yield MetadataChangeProposalWrapper(
|
|
604
|
-
entityType="dataset",
|
|
605
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
606
603
|
entityUrn=dataset_urn,
|
|
607
|
-
aspectName="subTypes",
|
|
608
604
|
aspect=SubTypesClass(typeNames=[self.table_subtype]),
|
|
609
605
|
).as_workunit()
|
|
610
606
|
|
|
@@ -810,10 +806,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
810
806
|
|
|
811
807
|
# Add views subtype
|
|
812
808
|
yield MetadataChangeProposalWrapper(
|
|
813
|
-
entityType="dataset",
|
|
814
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
815
809
|
entityUrn=dataset_urn,
|
|
816
|
-
aspectName="subTypes",
|
|
817
810
|
aspect=SubTypesClass(typeNames=[self.view_subtype]),
|
|
818
811
|
).as_workunit()
|
|
819
812
|
|
|
@@ -824,10 +817,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
824
817
|
viewLogic=dataset.view_definition if dataset.view_definition else "",
|
|
825
818
|
)
|
|
826
819
|
yield MetadataChangeProposalWrapper(
|
|
827
|
-
entityType="dataset",
|
|
828
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
829
820
|
entityUrn=dataset_urn,
|
|
830
|
-
aspectName="viewProperties",
|
|
831
821
|
aspect=view_properties_aspect,
|
|
832
822
|
).as_workunit()
|
|
833
823
|
|
|
@@ -27,6 +27,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
27
27
|
from datahub.ingestion.api.source import StructuredLogLevel
|
|
28
28
|
from datahub.ingestion.api.source_helpers import auto_workunit
|
|
29
29
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
30
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
30
31
|
from datahub.ingestion.source.sql.mssql.job_models import (
|
|
31
32
|
JobStep,
|
|
32
33
|
MSSQLDataFlow,
|
|
@@ -177,10 +178,18 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
|
|
|
177
178
|
@capability(
|
|
178
179
|
SourceCapability.LINEAGE_COARSE,
|
|
179
180
|
"Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_lineage`",
|
|
181
|
+
subtype_modifier=[
|
|
182
|
+
SourceCapabilityModifier.STORED_PROCEDURE,
|
|
183
|
+
SourceCapabilityModifier.VIEW,
|
|
184
|
+
],
|
|
180
185
|
)
|
|
181
186
|
@capability(
|
|
182
187
|
SourceCapability.LINEAGE_FINE,
|
|
183
188
|
"Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_column_lineage`",
|
|
189
|
+
subtype_modifier=[
|
|
190
|
+
SourceCapabilityModifier.STORED_PROCEDURE,
|
|
191
|
+
SourceCapabilityModifier.VIEW,
|
|
192
|
+
],
|
|
184
193
|
)
|
|
185
194
|
class SQLServerSource(SQLAlchemySource):
|
|
186
195
|
"""
|
|
@@ -936,25 +945,25 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
936
945
|
url = self.config.get_sql_alchemy_url()
|
|
937
946
|
logger.debug(f"sql_alchemy_url={url}")
|
|
938
947
|
engine = create_engine(url, **self.config.options)
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
948
|
+
|
|
949
|
+
if self.config.database and self.config.database != "":
|
|
950
|
+
inspector = inspect(engine)
|
|
951
|
+
yield inspector
|
|
952
|
+
else:
|
|
953
|
+
with engine.begin() as conn:
|
|
944
954
|
databases = conn.execute(
|
|
945
955
|
"SELECT name FROM master.sys.databases WHERE name NOT IN \
|
|
946
956
|
('master', 'model', 'msdb', 'tempdb', 'Resource', \
|
|
947
957
|
'distribution' , 'reportserver', 'reportservertempdb'); "
|
|
948
|
-
)
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
yield inspector
|
|
958
|
+
).fetchall()
|
|
959
|
+
|
|
960
|
+
for db in databases:
|
|
961
|
+
if self.config.database_pattern.allowed(db["name"]):
|
|
962
|
+
url = self.config.get_sql_alchemy_url(current_db=db["name"])
|
|
963
|
+
engine = create_engine(url, **self.config.options)
|
|
964
|
+
inspector = inspect(engine)
|
|
965
|
+
self.current_database = db["name"]
|
|
966
|
+
yield inspector
|
|
958
967
|
|
|
959
968
|
def get_identifier(
|
|
960
969
|
self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any
|
|
@@ -441,7 +441,7 @@ class OracleInspectorObjectWrapper:
|
|
|
441
441
|
"\nac.constraint_name,"
|
|
442
442
|
"\nac.constraint_type,"
|
|
443
443
|
"\nacc.column_name AS local_column,"
|
|
444
|
-
"\nac.
|
|
444
|
+
"\nac.table_name AS remote_table,"
|
|
445
445
|
"\nrcc.column_name AS remote_column,"
|
|
446
446
|
"\nac.r_owner AS remote_owner,"
|
|
447
447
|
"\nacc.position AS loc_pos,"
|
|
@@ -54,6 +54,7 @@ from datahub.ingestion.source.common.data_reader import DataReader
|
|
|
54
54
|
from datahub.ingestion.source.common.subtypes import (
|
|
55
55
|
DatasetContainerSubTypes,
|
|
56
56
|
DatasetSubTypes,
|
|
57
|
+
SourceCapabilityModifier,
|
|
57
58
|
)
|
|
58
59
|
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
|
|
59
60
|
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
@@ -291,6 +292,10 @@ class ProfileMetadata:
|
|
|
291
292
|
SourceCapability.CONTAINERS,
|
|
292
293
|
"Enabled by default",
|
|
293
294
|
supported=True,
|
|
295
|
+
subtype_modifier=[
|
|
296
|
+
SourceCapabilityModifier.DATABASE,
|
|
297
|
+
SourceCapabilityModifier.SCHEMA,
|
|
298
|
+
],
|
|
294
299
|
)
|
|
295
300
|
@capability(
|
|
296
301
|
SourceCapability.DESCRIPTIONS,
|
|
@@ -305,10 +310,12 @@ class ProfileMetadata:
|
|
|
305
310
|
@capability(
|
|
306
311
|
SourceCapability.LINEAGE_COARSE,
|
|
307
312
|
"Enabled by default to get lineage for views via `include_view_lineage`",
|
|
313
|
+
subtype_modifier=[SourceCapabilityModifier.VIEW],
|
|
308
314
|
)
|
|
309
315
|
@capability(
|
|
310
316
|
SourceCapability.LINEAGE_FINE,
|
|
311
317
|
"Enabled by default to get lineage for views via `include_view_column_lineage`",
|
|
318
|
+
subtype_modifier=[SourceCapabilityModifier.VIEW],
|
|
312
319
|
)
|
|
313
320
|
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
314
321
|
@capability(
|
|
@@ -586,6 +593,10 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
586
593
|
)
|
|
587
594
|
|
|
588
595
|
# Generate workunit for aggregated SQL parsing results
|
|
596
|
+
yield from self._generate_aggregator_workunits()
|
|
597
|
+
|
|
598
|
+
def _generate_aggregator_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
599
|
+
"""Generate work units from SQL parsing aggregator. Can be overridden by subclasses."""
|
|
589
600
|
for mcp in self.aggregator.gen_metadata():
|
|
590
601
|
yield mcp.as_workunit()
|
|
591
602
|
|
|
@@ -57,10 +57,11 @@ class GenericProfiler:
|
|
|
57
57
|
platform: Optional[str] = None,
|
|
58
58
|
profiler_args: Optional[Dict] = None,
|
|
59
59
|
) -> Iterable[MetadataWorkUnit]:
|
|
60
|
+
# We don't run ge profiling queries if table profiling is enabled or if the row count is 0.
|
|
60
61
|
ge_profile_requests: List[GEProfilerRequest] = [
|
|
61
62
|
cast(GEProfilerRequest, request)
|
|
62
63
|
for request in requests
|
|
63
|
-
if not request.profile_table_level_only
|
|
64
|
+
if not request.profile_table_level_only or request.table.rows_count == 0
|
|
64
65
|
]
|
|
65
66
|
table_level_profile_requests: List[TableProfilerRequest] = [
|
|
66
67
|
request for request in requests if request.profile_table_level_only
|