acryl-datahub 0.15.0.6rc3__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2552 -2523
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +204 -191
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +1 -1
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +3 -5
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +3 -3
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +6 -12
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +7 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +251 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +29 -5
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +20 -13
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
datahub/ingestion/source/nifi.py
CHANGED
|
@@ -22,7 +22,9 @@ from requests_gssapi import HTTPSPNEGOAuth
|
|
|
22
22
|
|
|
23
23
|
import datahub.emitter.mce_builder as builder
|
|
24
24
|
from datahub.configuration.common import AllowDenyPattern
|
|
25
|
-
from datahub.configuration.source_common import
|
|
25
|
+
from datahub.configuration.source_common import (
|
|
26
|
+
EnvConfigMixin,
|
|
27
|
+
)
|
|
26
28
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
27
29
|
from datahub.emitter.mcp_builder import ContainerKey, gen_containers
|
|
28
30
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -33,9 +35,21 @@ from datahub.ingestion.api.decorators import (
|
|
|
33
35
|
platform_name,
|
|
34
36
|
support_status,
|
|
35
37
|
)
|
|
36
|
-
from datahub.ingestion.api.source import
|
|
38
|
+
from datahub.ingestion.api.source import (
|
|
39
|
+
MetadataWorkUnitProcessor,
|
|
40
|
+
SourceCapability,
|
|
41
|
+
SourceReport,
|
|
42
|
+
)
|
|
37
43
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
38
44
|
from datahub.ingestion.source.common.subtypes import JobContainerSubTypes
|
|
45
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
46
|
+
StaleEntityRemovalHandler,
|
|
47
|
+
StaleEntityRemovalSourceReport,
|
|
48
|
+
)
|
|
49
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
50
|
+
StatefulIngestionConfigBase,
|
|
51
|
+
StatefulIngestionSourceBase,
|
|
52
|
+
)
|
|
39
53
|
from datahub.metadata.schema_classes import (
|
|
40
54
|
BrowsePathEntryClass,
|
|
41
55
|
BrowsePathsV2Class,
|
|
@@ -81,7 +95,7 @@ class ProcessGroupKey(ContainerKey):
|
|
|
81
95
|
process_group_id: str
|
|
82
96
|
|
|
83
97
|
|
|
84
|
-
class NifiSourceConfig(EnvConfigMixin):
|
|
98
|
+
class NifiSourceConfig(StatefulIngestionConfigBase, EnvConfigMixin):
|
|
85
99
|
site_url: str = Field(
|
|
86
100
|
description="URL for Nifi, ending with /nifi/. e.g. https://mynifi.domain/nifi/"
|
|
87
101
|
)
|
|
@@ -452,7 +466,7 @@ def get_attribute_value(attr_lst: List[dict], attr_name: str) -> Optional[str]:
|
|
|
452
466
|
|
|
453
467
|
|
|
454
468
|
@dataclass
|
|
455
|
-
class NifiSourceReport(
|
|
469
|
+
class NifiSourceReport(StaleEntityRemovalSourceReport):
|
|
456
470
|
filtered: LossyList[str] = field(default_factory=LossyList)
|
|
457
471
|
|
|
458
472
|
def report_dropped(self, ent_name: str) -> None:
|
|
@@ -464,13 +478,14 @@ class NifiSourceReport(SourceReport):
|
|
|
464
478
|
@config_class(NifiSourceConfig)
|
|
465
479
|
@support_status(SupportStatus.CERTIFIED)
|
|
466
480
|
@capability(SourceCapability.LINEAGE_COARSE, "Supported. See docs for limitations")
|
|
467
|
-
class NifiSource(
|
|
481
|
+
class NifiSource(StatefulIngestionSourceBase):
|
|
468
482
|
config: NifiSourceConfig
|
|
469
483
|
report: NifiSourceReport
|
|
470
484
|
|
|
471
485
|
def __init__(self, config: NifiSourceConfig, ctx: PipelineContext) -> None:
|
|
472
|
-
super().__init__(ctx)
|
|
486
|
+
super().__init__(config, ctx)
|
|
473
487
|
self.config = config
|
|
488
|
+
self.ctx = ctx
|
|
474
489
|
self.report = NifiSourceReport()
|
|
475
490
|
self.session = requests.Session()
|
|
476
491
|
|
|
@@ -488,7 +503,7 @@ class NifiSource(Source):
|
|
|
488
503
|
def get_report(self) -> SourceReport:
|
|
489
504
|
return self.report
|
|
490
505
|
|
|
491
|
-
def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:
|
|
506
|
+
def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:
|
|
492
507
|
"""
|
|
493
508
|
Update self.nifi_flow with contents of the input process group `pg_flow_dto`
|
|
494
509
|
"""
|
|
@@ -894,7 +909,7 @@ class NifiSource(Source):
|
|
|
894
909
|
if not delete_response.ok:
|
|
895
910
|
logger.error("failed to delete provenance ", provenance_uri)
|
|
896
911
|
|
|
897
|
-
def construct_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
912
|
+
def construct_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
898
913
|
rootpg = self.nifi_flow.root_process_group
|
|
899
914
|
flow_name = rootpg.name # self.config.site_name
|
|
900
915
|
flow_urn = self.make_flow_urn()
|
|
@@ -1151,6 +1166,14 @@ class NifiSource(Source):
|
|
|
1151
1166
|
token_response.raise_for_status()
|
|
1152
1167
|
self.session.headers.update({"Authorization": "Bearer " + token_response.text})
|
|
1153
1168
|
|
|
1169
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
1170
|
+
return [
|
|
1171
|
+
*super().get_workunit_processors(),
|
|
1172
|
+
StaleEntityRemovalHandler.create(
|
|
1173
|
+
self, self.config, self.ctx
|
|
1174
|
+
).workunit_processor,
|
|
1175
|
+
]
|
|
1176
|
+
|
|
1154
1177
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
1155
1178
|
try:
|
|
1156
1179
|
self.authenticate()
|
|
@@ -1211,11 +1234,14 @@ class NifiSource(Source):
|
|
|
1211
1234
|
job_type: str,
|
|
1212
1235
|
description: Optional[str],
|
|
1213
1236
|
job_properties: Optional[Dict[str, str]] = None,
|
|
1214
|
-
inlets: List[str] =
|
|
1215
|
-
outlets: List[str] =
|
|
1216
|
-
inputJobs: List[str] =
|
|
1237
|
+
inlets: Optional[List[str]] = None,
|
|
1238
|
+
outlets: Optional[List[str]] = None,
|
|
1239
|
+
inputJobs: Optional[List[str]] = None,
|
|
1217
1240
|
status: Optional[str] = None,
|
|
1218
1241
|
) -> Iterable[MetadataWorkUnit]:
|
|
1242
|
+
inlets = inlets or []
|
|
1243
|
+
outlets = outlets or []
|
|
1244
|
+
inputJobs = inputJobs or []
|
|
1219
1245
|
logger.debug(f"Begining construction of job workunit for {job_urn}")
|
|
1220
1246
|
if job_properties:
|
|
1221
1247
|
job_properties = {k: v for k, v in job_properties.items() if v is not None}
|
|
@@ -270,7 +270,7 @@ class APISource(Source, ABC):
|
|
|
270
270
|
mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
|
|
271
271
|
return ApiWorkUnit(id=dataset_name, mce=mce)
|
|
272
272
|
|
|
273
|
-
def get_workunits_internal(self) -> Iterable[ApiWorkUnit]:
|
|
273
|
+
def get_workunits_internal(self) -> Iterable[ApiWorkUnit]:
|
|
274
274
|
config = self.config
|
|
275
275
|
|
|
276
276
|
sw_dict = self.config.get_swagger()
|
|
@@ -12,7 +12,11 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
12
12
|
SchemaField,
|
|
13
13
|
SchemaMetadata,
|
|
14
14
|
)
|
|
15
|
-
from datahub.metadata.schema_classes import
|
|
15
|
+
from datahub.metadata.schema_classes import (
|
|
16
|
+
RecordTypeClass,
|
|
17
|
+
SchemaFieldDataTypeClass,
|
|
18
|
+
StringTypeClass,
|
|
19
|
+
)
|
|
16
20
|
|
|
17
21
|
logger = logging.getLogger(__name__)
|
|
18
22
|
|
|
@@ -20,9 +24,12 @@ logger = logging.getLogger(__name__)
|
|
|
20
24
|
def flatten(d: dict, prefix: str = "") -> Generator:
|
|
21
25
|
for k, v in d.items():
|
|
22
26
|
if isinstance(v, dict):
|
|
27
|
+
# First yield the parent field
|
|
28
|
+
yield f"{prefix}.{k}".strip(".")
|
|
29
|
+
# Then yield all nested fields
|
|
23
30
|
yield from flatten(v, f"{prefix}.{k}")
|
|
24
31
|
else:
|
|
25
|
-
yield f"{prefix}
|
|
32
|
+
yield f"{prefix}.{k}".strip(".") # Use dot instead of hyphen
|
|
26
33
|
|
|
27
34
|
|
|
28
35
|
def flatten2list(d: dict) -> list:
|
|
@@ -34,7 +41,7 @@ def flatten2list(d: dict) -> list:
|
|
|
34
41
|
"anotherone": {"third_a": {"last": 3}}
|
|
35
42
|
}
|
|
36
43
|
|
|
37
|
-
|
|
44
|
+
yields:
|
|
38
45
|
|
|
39
46
|
["first.second_a",
|
|
40
47
|
"first.second_b",
|
|
@@ -43,7 +50,7 @@ def flatten2list(d: dict) -> list:
|
|
|
43
50
|
]
|
|
44
51
|
"""
|
|
45
52
|
fl_l = list(flatten(d))
|
|
46
|
-
return
|
|
53
|
+
return fl_l
|
|
47
54
|
|
|
48
55
|
|
|
49
56
|
def request_call(
|
|
@@ -111,7 +118,7 @@ def check_sw_version(sw_dict: dict) -> None:
|
|
|
111
118
|
)
|
|
112
119
|
|
|
113
120
|
|
|
114
|
-
def get_endpoints(sw_dict: dict) -> dict:
|
|
121
|
+
def get_endpoints(sw_dict: dict) -> dict:
|
|
115
122
|
"""
|
|
116
123
|
Get all the URLs, together with their description and the tags
|
|
117
124
|
"""
|
|
@@ -160,7 +167,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
|
|
|
160
167
|
Try to determine if example data is defined for the endpoint, and return it
|
|
161
168
|
"""
|
|
162
169
|
data = {}
|
|
163
|
-
if "content" in base_res
|
|
170
|
+
if "content" in base_res:
|
|
164
171
|
res_cont = base_res["content"]
|
|
165
172
|
if "application/json" in res_cont.keys():
|
|
166
173
|
ex_field = None
|
|
@@ -181,7 +188,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
|
|
|
181
188
|
)
|
|
182
189
|
elif "text/csv" in res_cont.keys():
|
|
183
190
|
data = res_cont["text/csv"]["schema"]
|
|
184
|
-
elif "examples" in base_res
|
|
191
|
+
elif "examples" in base_res:
|
|
185
192
|
data = base_res["examples"]["application/json"]
|
|
186
193
|
|
|
187
194
|
return data
|
|
@@ -322,6 +329,8 @@ def extract_fields(
|
|
|
322
329
|
return ["contains_a_string"], {"contains_a_string": dict_data[0]}
|
|
323
330
|
else:
|
|
324
331
|
raise ValueError("unknown format")
|
|
332
|
+
elif not dict_data: # Handle empty dict case
|
|
333
|
+
return [], {}
|
|
325
334
|
if len(dict_data) > 1:
|
|
326
335
|
# the elements are directly inside the dict
|
|
327
336
|
return flatten2list(dict_data), dict_data
|
|
@@ -384,16 +393,39 @@ def set_metadata(
|
|
|
384
393
|
dataset_name: str, fields: List, platform: str = "api"
|
|
385
394
|
) -> SchemaMetadata:
|
|
386
395
|
canonical_schema: List[SchemaField] = []
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
396
|
+
seen_paths = set()
|
|
397
|
+
|
|
398
|
+
# Process all flattened fields
|
|
399
|
+
for field_path in fields:
|
|
400
|
+
parts = field_path.split(".")
|
|
401
|
+
|
|
402
|
+
# Add struct/object fields for each ancestor path
|
|
403
|
+
current_path: List[str] = []
|
|
404
|
+
for part in parts[:-1]:
|
|
405
|
+
ancestor_path = ".".join(current_path + [part])
|
|
406
|
+
if ancestor_path not in seen_paths:
|
|
407
|
+
struct_field = SchemaField(
|
|
408
|
+
fieldPath=ancestor_path,
|
|
409
|
+
nativeDataType="object", # OpenAPI term for struct/record
|
|
410
|
+
type=SchemaFieldDataTypeClass(type=RecordTypeClass()),
|
|
411
|
+
description="",
|
|
412
|
+
recursive=False,
|
|
413
|
+
)
|
|
414
|
+
canonical_schema.append(struct_field)
|
|
415
|
+
seen_paths.add(ancestor_path)
|
|
416
|
+
current_path.append(part)
|
|
417
|
+
|
|
418
|
+
# Add the leaf field if not already seen
|
|
419
|
+
if field_path not in seen_paths:
|
|
420
|
+
leaf_field = SchemaField(
|
|
421
|
+
fieldPath=field_path,
|
|
422
|
+
nativeDataType="str", # Keeping `str` for backwards compatability, ideally this is the correct type
|
|
423
|
+
type=SchemaFieldDataTypeClass(type=StringTypeClass()),
|
|
424
|
+
description="",
|
|
425
|
+
recursive=False,
|
|
426
|
+
)
|
|
427
|
+
canonical_schema.append(leaf_field)
|
|
428
|
+
seen_paths.add(field_path)
|
|
397
429
|
|
|
398
430
|
schema_metadata = SchemaMetadata(
|
|
399
431
|
schemaName=dataset_name,
|
|
@@ -2,7 +2,7 @@ import functools
|
|
|
2
2
|
import importlib.resources as pkg_resource
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
from typing import Dict, List
|
|
5
|
+
from typing import Dict, List, Optional
|
|
6
6
|
|
|
7
7
|
import lark
|
|
8
8
|
from lark import Lark, Tree
|
|
@@ -65,8 +65,9 @@ def get_upstream_tables(
|
|
|
65
65
|
platform_instance_resolver: AbstractDataPlatformInstanceResolver,
|
|
66
66
|
ctx: PipelineContext,
|
|
67
67
|
config: PowerBiDashboardSourceConfig,
|
|
68
|
-
parameters: Dict[str, str] =
|
|
68
|
+
parameters: Optional[Dict[str, str]] = None,
|
|
69
69
|
) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
|
|
70
|
+
parameters = parameters or {}
|
|
70
71
|
if table.expression is None:
|
|
71
72
|
logger.debug(f"There is no M-Query expression in table {table.full_name}")
|
|
72
73
|
return []
|
|
@@ -70,13 +70,14 @@ def get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
|
|
|
70
70
|
return expression_tree
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def token_values(tree: Tree, parameters: Dict[str, str] =
|
|
73
|
+
def token_values(tree: Tree, parameters: Optional[Dict[str, str]] = None) -> List[str]:
|
|
74
74
|
"""
|
|
75
75
|
:param tree: Tree to traverse
|
|
76
76
|
:param parameters: If parameters is not an empty dict, it will try to resolve identifier variable references
|
|
77
77
|
using the values in 'parameters'.
|
|
78
78
|
:return: List of leaf token data
|
|
79
79
|
"""
|
|
80
|
+
parameters = parameters or {}
|
|
80
81
|
values: List[str] = []
|
|
81
82
|
|
|
82
83
|
def internal(node: Union[Tree, Token]) -> None:
|
|
@@ -890,9 +890,7 @@ class Mapper:
|
|
|
890
890
|
set(user_rights) & set(self.__config.ownership.owner_criteria)
|
|
891
891
|
)
|
|
892
892
|
> 0
|
|
893
|
-
):
|
|
894
|
-
user_mcps.extend(self.to_datahub_user(user))
|
|
895
|
-
elif self.__config.ownership.owner_criteria is None:
|
|
893
|
+
) or self.__config.ownership.owner_criteria is None:
|
|
896
894
|
user_mcps.extend(self.to_datahub_user(user))
|
|
897
895
|
else:
|
|
898
896
|
continue
|
|
@@ -380,8 +380,9 @@ class DataResolverBase(ABC):
|
|
|
380
380
|
def itr_pages(
|
|
381
381
|
self,
|
|
382
382
|
endpoint: str,
|
|
383
|
-
parameter_override: Dict =
|
|
383
|
+
parameter_override: Optional[Dict] = None,
|
|
384
384
|
) -> Iterator[List[Dict]]:
|
|
385
|
+
parameter_override = parameter_override or {}
|
|
385
386
|
params: dict = {
|
|
386
387
|
"$skip": 0,
|
|
387
388
|
"$top": self.TOP,
|
|
@@ -14,7 +14,9 @@ from requests_ntlm import HttpNtlmAuth
|
|
|
14
14
|
|
|
15
15
|
import datahub.emitter.mce_builder as builder
|
|
16
16
|
from datahub.configuration.common import AllowDenyPattern
|
|
17
|
-
from datahub.configuration.source_common import
|
|
17
|
+
from datahub.configuration.source_common import (
|
|
18
|
+
EnvConfigMixin,
|
|
19
|
+
)
|
|
18
20
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
19
21
|
from datahub.ingestion.api.common import PipelineContext
|
|
20
22
|
from datahub.ingestion.api.decorators import (
|
|
@@ -25,7 +27,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
25
27
|
platform_name,
|
|
26
28
|
support_status,
|
|
27
29
|
)
|
|
28
|
-
from datahub.ingestion.api.source import
|
|
30
|
+
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
|
|
29
31
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
30
32
|
from datahub.ingestion.source.powerbi_report_server.constants import (
|
|
31
33
|
API_ENDPOINTS,
|
|
@@ -39,6 +41,14 @@ from datahub.ingestion.source.powerbi_report_server.report_server_domain import
|
|
|
39
41
|
PowerBiReport,
|
|
40
42
|
Report,
|
|
41
43
|
)
|
|
44
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
45
|
+
StaleEntityRemovalHandler,
|
|
46
|
+
StaleEntityRemovalSourceReport,
|
|
47
|
+
)
|
|
48
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
49
|
+
StatefulIngestionConfigBase,
|
|
50
|
+
StatefulIngestionSourceBase,
|
|
51
|
+
)
|
|
42
52
|
from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
|
|
43
53
|
from datahub.metadata.schema_classes import (
|
|
44
54
|
BrowsePathsClass,
|
|
@@ -58,7 +68,7 @@ from datahub.utilities.lossy_collections import LossyList
|
|
|
58
68
|
LOGGER = logging.getLogger(__name__)
|
|
59
69
|
|
|
60
70
|
|
|
61
|
-
class PowerBiReportServerAPIConfig(EnvConfigMixin):
|
|
71
|
+
class PowerBiReportServerAPIConfig(StatefulIngestionConfigBase, EnvConfigMixin):
|
|
62
72
|
username: str = pydantic.Field(description="Windows account username")
|
|
63
73
|
password: str = pydantic.Field(description="Windows account password")
|
|
64
74
|
workstation_name: str = pydantic.Field(
|
|
@@ -186,7 +196,7 @@ class PowerBiReportServerAPI:
|
|
|
186
196
|
}
|
|
187
197
|
|
|
188
198
|
reports: List[Any] = []
|
|
189
|
-
for report_type in report_types_mapping
|
|
199
|
+
for report_type in report_types_mapping:
|
|
190
200
|
report_get_endpoint: str = API_ENDPOINTS[report_type]
|
|
191
201
|
# Replace place holders
|
|
192
202
|
report_get_endpoint_http = report_get_endpoint.format(
|
|
@@ -475,7 +485,7 @@ class Mapper:
|
|
|
475
485
|
|
|
476
486
|
|
|
477
487
|
@dataclass
|
|
478
|
-
class PowerBiReportServerDashboardSourceReport(
|
|
488
|
+
class PowerBiReportServerDashboardSourceReport(StaleEntityRemovalSourceReport):
|
|
479
489
|
scanned_report: int = 0
|
|
480
490
|
filtered_reports: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
481
491
|
|
|
@@ -490,7 +500,7 @@ class PowerBiReportServerDashboardSourceReport(SourceReport):
|
|
|
490
500
|
@config_class(PowerBiReportServerDashboardSourceConfig)
|
|
491
501
|
@support_status(SupportStatus.INCUBATING)
|
|
492
502
|
@capability(SourceCapability.OWNERSHIP, "Enabled by default")
|
|
493
|
-
class PowerBiReportServerDashboardSource(
|
|
503
|
+
class PowerBiReportServerDashboardSource(StatefulIngestionSourceBase):
|
|
494
504
|
"""
|
|
495
505
|
Use this plugin to connect to [PowerBI Report Server](https://powerbi.microsoft.com/en-us/report-server/).
|
|
496
506
|
It extracts the following:
|
|
@@ -520,8 +530,9 @@ class PowerBiReportServerDashboardSource(Source):
|
|
|
520
530
|
def __init__(
|
|
521
531
|
self, config: PowerBiReportServerDashboardSourceConfig, ctx: PipelineContext
|
|
522
532
|
):
|
|
523
|
-
super().__init__(ctx)
|
|
533
|
+
super().__init__(config, ctx)
|
|
524
534
|
self.source_config = config
|
|
535
|
+
self.ctx = ctx
|
|
525
536
|
self.report = PowerBiReportServerDashboardSourceReport()
|
|
526
537
|
self.auth = PowerBiReportServerAPI(self.source_config).get_auth_credentials
|
|
527
538
|
self.powerbi_client = PowerBiReportServerAPI(self.source_config)
|
|
@@ -532,6 +543,14 @@ class PowerBiReportServerDashboardSource(Source):
|
|
|
532
543
|
config = PowerBiReportServerDashboardSourceConfig.parse_obj(config_dict)
|
|
533
544
|
return cls(config, ctx)
|
|
534
545
|
|
|
546
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
547
|
+
return [
|
|
548
|
+
*super().get_workunit_processors(),
|
|
549
|
+
StaleEntityRemovalHandler.create(
|
|
550
|
+
self, self.source_config, self.ctx
|
|
551
|
+
).workunit_processor,
|
|
552
|
+
]
|
|
553
|
+
|
|
535
554
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
536
555
|
"""
|
|
537
556
|
Datahub Ingestion framework invoke this method
|
|
@@ -33,7 +33,7 @@ class CatalogItem(BaseModel):
|
|
|
33
33
|
)
|
|
34
34
|
|
|
35
35
|
@validator("display_name", always=True)
|
|
36
|
-
def validate_diplay_name(cls, value, values):
|
|
36
|
+
def validate_diplay_name(cls, value, values):
|
|
37
37
|
if values["created_by"]:
|
|
38
38
|
return values["created_by"].split("\\")[-1]
|
|
39
39
|
return ""
|
|
@@ -16,10 +16,13 @@ from datahub.ingestion.api.decorators import (
|
|
|
16
16
|
support_status,
|
|
17
17
|
)
|
|
18
18
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
19
|
-
StaleEntityRemovalSourceReport,
|
|
20
19
|
StatefulStaleMetadataRemovalConfig,
|
|
21
20
|
)
|
|
22
|
-
from datahub.ingestion.source.superset import
|
|
21
|
+
from datahub.ingestion.source.superset import (
|
|
22
|
+
SupersetConfig,
|
|
23
|
+
SupersetSource,
|
|
24
|
+
SupersetSourceReport,
|
|
25
|
+
)
|
|
23
26
|
from datahub.utilities import config_clean
|
|
24
27
|
|
|
25
28
|
logger = logging.getLogger(__name__)
|
|
@@ -76,7 +79,7 @@ class PresetSource(SupersetSource):
|
|
|
76
79
|
"""
|
|
77
80
|
|
|
78
81
|
config: PresetConfig
|
|
79
|
-
report:
|
|
82
|
+
report: SupersetSourceReport
|
|
80
83
|
platform = "preset"
|
|
81
84
|
|
|
82
85
|
def __init__(self, ctx: PipelineContext, config: PresetConfig):
|
|
@@ -84,7 +87,7 @@ class PresetSource(SupersetSource):
|
|
|
84
87
|
|
|
85
88
|
super().__init__(ctx, config)
|
|
86
89
|
self.config = config
|
|
87
|
-
self.report =
|
|
90
|
+
self.report = SupersetSourceReport()
|
|
88
91
|
self.platform = "preset"
|
|
89
92
|
|
|
90
93
|
def login(self):
|
|
@@ -116,6 +116,7 @@ class PulsarSource(StatefulIngestionSourceBase):
|
|
|
116
116
|
def __init__(self, config: PulsarSourceConfig, ctx: PipelineContext):
|
|
117
117
|
super().__init__(config, ctx)
|
|
118
118
|
self.platform: str = "pulsar"
|
|
119
|
+
self.ctx = ctx
|
|
119
120
|
self.config: PulsarSourceConfig = config
|
|
120
121
|
self.report: PulsarSourceReport = PulsarSourceReport()
|
|
121
122
|
|
|
@@ -229,8 +230,8 @@ class PulsarSource(StatefulIngestionSourceBase):
|
|
|
229
230
|
self.report.report_warning("HTTPError", message)
|
|
230
231
|
except requests.exceptions.RequestException as e:
|
|
231
232
|
raise Exception(
|
|
232
|
-
|
|
233
|
-
)
|
|
233
|
+
"An ambiguous exception occurred while handling the request"
|
|
234
|
+
) from e
|
|
234
235
|
|
|
235
236
|
@classmethod
|
|
236
237
|
def create(cls, config_dict, ctx):
|
|
@@ -17,8 +17,9 @@ class WebsocketConnection:
|
|
|
17
17
|
self.handle = [-1]
|
|
18
18
|
|
|
19
19
|
def _build_websocket_request_dict(
|
|
20
|
-
self, method: str, params: Union[Dict, List] =
|
|
20
|
+
self, method: str, params: Optional[Union[Dict, List]] = None
|
|
21
21
|
) -> Dict:
|
|
22
|
+
params = params or {}
|
|
22
23
|
return {
|
|
23
24
|
"jsonrpc": "2.0",
|
|
24
25
|
"id": self.request_id,
|
|
@@ -37,11 +38,12 @@ class WebsocketConnection:
|
|
|
37
38
|
return {}
|
|
38
39
|
|
|
39
40
|
def websocket_send_request(
|
|
40
|
-
self, method: str, params: Union[Dict, List] =
|
|
41
|
+
self, method: str, params: Optional[Union[Dict, List]] = None
|
|
41
42
|
) -> Dict:
|
|
42
43
|
"""
|
|
43
44
|
Method to send request to websocket
|
|
44
45
|
"""
|
|
46
|
+
params = params or {}
|
|
45
47
|
self.request_id += 1
|
|
46
48
|
request = self._build_websocket_request_dict(method, params)
|
|
47
49
|
response = self._send_request(request=request)
|
|
@@ -12,7 +12,7 @@ from requests.adapters import HTTPAdapter
|
|
|
12
12
|
from urllib3.util.retry import Retry
|
|
13
13
|
|
|
14
14
|
import datahub.emitter.mce_builder as builder
|
|
15
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
15
|
+
from datahub.configuration.common import AllowDenyPattern
|
|
16
16
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
17
17
|
from datahub.ingestion.api.common import PipelineContext
|
|
18
18
|
from datahub.ingestion.api.decorators import ( # SourceCapability,; capability,
|
|
@@ -22,8 +22,20 @@ from datahub.ingestion.api.decorators import ( # SourceCapability,; capability,
|
|
|
22
22
|
platform_name,
|
|
23
23
|
support_status,
|
|
24
24
|
)
|
|
25
|
-
from datahub.ingestion.api.source import
|
|
25
|
+
from datahub.ingestion.api.source import (
|
|
26
|
+
MetadataWorkUnitProcessor,
|
|
27
|
+
SourceCapability,
|
|
28
|
+
SourceReport,
|
|
29
|
+
)
|
|
26
30
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
31
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
32
|
+
StaleEntityRemovalHandler,
|
|
33
|
+
StaleEntityRemovalSourceReport,
|
|
34
|
+
)
|
|
35
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
36
|
+
StatefulIngestionConfigBase,
|
|
37
|
+
StatefulIngestionSourceBase,
|
|
38
|
+
)
|
|
27
39
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
28
40
|
AuditStamp,
|
|
29
41
|
ChangeAuditStamps,
|
|
@@ -235,7 +247,9 @@ def get_full_qualified_name(platform: str, database_name: str, table_name: str)
|
|
|
235
247
|
return f"{database_name}.{table_name}"
|
|
236
248
|
|
|
237
249
|
|
|
238
|
-
class RedashConfig(
|
|
250
|
+
class RedashConfig(
|
|
251
|
+
StatefulIngestionConfigBase,
|
|
252
|
+
):
|
|
239
253
|
# See the Redash API for details
|
|
240
254
|
# https://redash.io/help/user-guide/integrations-and-api/api
|
|
241
255
|
connect_uri: str = Field(
|
|
@@ -277,7 +291,7 @@ class RedashConfig(ConfigModel):
|
|
|
277
291
|
|
|
278
292
|
|
|
279
293
|
@dataclass
|
|
280
|
-
class RedashSourceReport(
|
|
294
|
+
class RedashSourceReport(StaleEntityRemovalSourceReport):
|
|
281
295
|
items_scanned: int = 0
|
|
282
296
|
filtered: LossyList[str] = field(default_factory=LossyList)
|
|
283
297
|
queries_problem_parsing: LossySet[str] = field(default_factory=LossySet)
|
|
@@ -305,7 +319,7 @@ class RedashSourceReport(SourceReport):
|
|
|
305
319
|
@config_class(RedashConfig)
|
|
306
320
|
@support_status(SupportStatus.INCUBATING)
|
|
307
321
|
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
|
|
308
|
-
class RedashSource(
|
|
322
|
+
class RedashSource(StatefulIngestionSourceBase):
|
|
309
323
|
"""
|
|
310
324
|
This plugin extracts the following:
|
|
311
325
|
|
|
@@ -316,8 +330,9 @@ class RedashSource(Source):
|
|
|
316
330
|
platform = "redash"
|
|
317
331
|
|
|
318
332
|
def __init__(self, ctx: PipelineContext, config: RedashConfig):
|
|
319
|
-
super().__init__(ctx)
|
|
333
|
+
super().__init__(config, ctx)
|
|
320
334
|
self.config: RedashConfig = config
|
|
335
|
+
self.ctx = ctx
|
|
321
336
|
self.report: RedashSourceReport = RedashSourceReport()
|
|
322
337
|
|
|
323
338
|
# Handle trailing slash removal
|
|
@@ -406,8 +421,9 @@ class RedashSource(Source):
|
|
|
406
421
|
return database_name
|
|
407
422
|
|
|
408
423
|
def _get_datasource_urns(
|
|
409
|
-
self, data_source: Dict, sql_query_data: Dict =
|
|
424
|
+
self, data_source: Dict, sql_query_data: Optional[Dict] = None
|
|
410
425
|
) -> Optional[List[str]]:
|
|
426
|
+
sql_query_data = sql_query_data or {}
|
|
411
427
|
platform = self._get_platform_based_on_datasource(data_source)
|
|
412
428
|
database_name = self._get_database_name_based_on_datasource(data_source)
|
|
413
429
|
data_source_syntax = data_source.get("syntax")
|
|
@@ -724,6 +740,14 @@ class RedashSource(Source):
|
|
|
724
740
|
def add_config_to_report(self) -> None:
|
|
725
741
|
self.report.api_page_limit = self.config.api_page_limit
|
|
726
742
|
|
|
743
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
744
|
+
return [
|
|
745
|
+
*super().get_workunit_processors(),
|
|
746
|
+
StaleEntityRemovalHandler.create(
|
|
747
|
+
self, self.config, self.ctx
|
|
748
|
+
).workunit_processor,
|
|
749
|
+
]
|
|
750
|
+
|
|
727
751
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
728
752
|
self.validate_connection()
|
|
729
753
|
self.add_config_to_report()
|
|
@@ -128,6 +128,10 @@ class RedshiftConfig(
|
|
|
128
128
|
default=True,
|
|
129
129
|
description="Whether lineage should be collected from copy commands",
|
|
130
130
|
)
|
|
131
|
+
include_share_lineage: bool = Field(
|
|
132
|
+
default=True,
|
|
133
|
+
description="Whether lineage should be collected from datashares",
|
|
134
|
+
)
|
|
131
135
|
|
|
132
136
|
include_usage_statistics: bool = Field(
|
|
133
137
|
default=False,
|