acryl-datahub 0.15.0.6rc3__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2552 -2523
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +204 -191
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +1 -1
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +3 -5
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +3 -3
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +6 -12
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +7 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +251 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +29 -5
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +20 -13
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,7 @@ import sys
|
|
|
9
9
|
import threading
|
|
10
10
|
import time
|
|
11
11
|
from dataclasses import dataclass
|
|
12
|
-
from typing import Any, Dict, Iterable, Iterator, List, Optional
|
|
12
|
+
from typing import Any, Dict, Iterable, Iterator, List, Optional
|
|
13
13
|
|
|
14
14
|
import click
|
|
15
15
|
import humanfriendly
|
|
@@ -26,7 +26,7 @@ from datahub.ingestion.api.common import EndOfStream, PipelineContext, RecordEnv
|
|
|
26
26
|
from datahub.ingestion.api.global_context import set_graph_context
|
|
27
27
|
from datahub.ingestion.api.pipeline_run_listener import PipelineRunListener
|
|
28
28
|
from datahub.ingestion.api.report import Report
|
|
29
|
-
from datahub.ingestion.api.sink import Sink, SinkReport
|
|
29
|
+
from datahub.ingestion.api.sink import Sink, SinkReport
|
|
30
30
|
from datahub.ingestion.api.source import Extractor, Source
|
|
31
31
|
from datahub.ingestion.api.transform import Transformer
|
|
32
32
|
from datahub.ingestion.extractor.extractor_registry import extractor_registry
|
|
@@ -35,15 +35,15 @@ from datahub.ingestion.reporting.reporting_provider_registry import (
|
|
|
35
35
|
reporting_provider_registry,
|
|
36
36
|
)
|
|
37
37
|
from datahub.ingestion.run.pipeline_config import PipelineConfig, ReporterConfig
|
|
38
|
+
from datahub.ingestion.run.sink_callback import DeadLetterQueueCallback, LoggingCallback
|
|
38
39
|
from datahub.ingestion.sink.datahub_rest import DatahubRestSink
|
|
39
|
-
from datahub.ingestion.sink.file import FileSink, FileSinkConfig
|
|
40
40
|
from datahub.ingestion.sink.sink_registry import sink_registry
|
|
41
41
|
from datahub.ingestion.source.source_registry import source_registry
|
|
42
42
|
from datahub.ingestion.transformer.system_metadata_transformer import (
|
|
43
43
|
SystemMetadataTransformer,
|
|
44
44
|
)
|
|
45
45
|
from datahub.ingestion.transformer.transform_registry import transform_registry
|
|
46
|
-
from datahub.
|
|
46
|
+
from datahub.sdk._attribution import KnownAttribution, change_default_attribution
|
|
47
47
|
from datahub.telemetry import stats
|
|
48
48
|
from datahub.telemetry.telemetry import telemetry_instance
|
|
49
49
|
from datahub.utilities._custom_package_loader import model_version_name
|
|
@@ -57,68 +57,6 @@ logger = logging.getLogger(__name__)
|
|
|
57
57
|
_REPORT_PRINT_INTERVAL_SECONDS = 60
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
class LoggingCallback(WriteCallback):
|
|
61
|
-
def __init__(self, name: str = "") -> None:
|
|
62
|
-
super().__init__()
|
|
63
|
-
self.name = name
|
|
64
|
-
|
|
65
|
-
def on_success(
|
|
66
|
-
self, record_envelope: RecordEnvelope, success_metadata: dict
|
|
67
|
-
) -> None:
|
|
68
|
-
logger.debug(
|
|
69
|
-
f"{self.name} sink wrote workunit {record_envelope.metadata['workunit_id']}"
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
def on_failure(
|
|
73
|
-
self,
|
|
74
|
-
record_envelope: RecordEnvelope,
|
|
75
|
-
failure_exception: Exception,
|
|
76
|
-
failure_metadata: dict,
|
|
77
|
-
) -> None:
|
|
78
|
-
logger.error(
|
|
79
|
-
f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}",
|
|
80
|
-
extra={"failure_metadata": failure_metadata},
|
|
81
|
-
exc_info=failure_exception,
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
class DeadLetterQueueCallback(WriteCallback):
|
|
86
|
-
def __init__(self, ctx: PipelineContext, config: Optional[FileSinkConfig]) -> None:
|
|
87
|
-
if not config:
|
|
88
|
-
config = FileSinkConfig.parse_obj({"filename": "failed_events.json"})
|
|
89
|
-
self.file_sink: FileSink = FileSink(ctx, config)
|
|
90
|
-
self.logging_callback = LoggingCallback(name="failure-queue")
|
|
91
|
-
logger.info(f"Failure logging enabled. Will log to {config.filename}.")
|
|
92
|
-
|
|
93
|
-
def on_success(
|
|
94
|
-
self, record_envelope: RecordEnvelope, success_metadata: dict
|
|
95
|
-
) -> None:
|
|
96
|
-
pass
|
|
97
|
-
|
|
98
|
-
def on_failure(
|
|
99
|
-
self,
|
|
100
|
-
record_envelope: RecordEnvelope,
|
|
101
|
-
failure_exception: Exception,
|
|
102
|
-
failure_metadata: dict,
|
|
103
|
-
) -> None:
|
|
104
|
-
if "workunit_id" in record_envelope.metadata:
|
|
105
|
-
if isinstance(record_envelope.record, MetadataChangeProposalClass):
|
|
106
|
-
mcp = cast(MetadataChangeProposalClass, record_envelope.record)
|
|
107
|
-
if mcp.systemMetadata:
|
|
108
|
-
if not mcp.systemMetadata.properties:
|
|
109
|
-
mcp.systemMetadata.properties = {}
|
|
110
|
-
if "workunit_id" not in mcp.systemMetadata.properties:
|
|
111
|
-
# update the workunit id
|
|
112
|
-
mcp.systemMetadata.properties["workunit_id"] = (
|
|
113
|
-
record_envelope.metadata["workunit_id"]
|
|
114
|
-
)
|
|
115
|
-
record_envelope.record = mcp
|
|
116
|
-
self.file_sink.write_record_async(record_envelope, self.logging_callback)
|
|
117
|
-
|
|
118
|
-
def close(self) -> None:
|
|
119
|
-
self.file_sink.close()
|
|
120
|
-
|
|
121
|
-
|
|
122
60
|
class PipelineInitError(Exception):
|
|
123
61
|
pass
|
|
124
62
|
|
|
@@ -236,76 +174,99 @@ class Pipeline:
|
|
|
236
174
|
self.last_time_printed = int(time.time())
|
|
237
175
|
self.cli_report = CliReport()
|
|
238
176
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
logger.info(
|
|
257
|
-
"No sink configured, attempting to use the default datahub-rest sink."
|
|
258
|
-
)
|
|
259
|
-
with _add_init_error_context("configure the default rest sink"):
|
|
260
|
-
self.sink_type = "datahub-rest"
|
|
261
|
-
self.sink = _make_default_rest_sink(self.ctx)
|
|
262
|
-
else:
|
|
263
|
-
self.sink_type = self.config.sink.type
|
|
264
|
-
with _add_init_error_context(
|
|
265
|
-
f"find a registered sink for type {self.sink_type}"
|
|
266
|
-
):
|
|
267
|
-
sink_class = sink_registry.get(self.sink_type)
|
|
268
|
-
|
|
269
|
-
with _add_init_error_context(f"configure the sink ({self.sink_type})"):
|
|
270
|
-
sink_config = self.config.sink.dict().get("config") or {}
|
|
271
|
-
self.sink = sink_class.create(sink_config, self.ctx)
|
|
272
|
-
logger.debug(f"Sink type {self.sink_type} ({sink_class}) configured")
|
|
273
|
-
logger.info(f"Sink configured successfully. {self.sink.configured()}")
|
|
274
|
-
|
|
275
|
-
if self.graph is None and isinstance(self.sink, DatahubRestSink):
|
|
276
|
-
with _add_init_error_context("setup default datahub client"):
|
|
277
|
-
self.graph = self.sink.emitter.to_graph()
|
|
278
|
-
self.graph.test_connection()
|
|
279
|
-
self.ctx.graph = self.graph
|
|
280
|
-
telemetry_instance.set_context(server=self.graph)
|
|
281
|
-
|
|
282
|
-
with set_graph_context(self.graph):
|
|
283
|
-
with _add_init_error_context("configure reporters"):
|
|
284
|
-
self._configure_reporting(report_to)
|
|
285
|
-
|
|
286
|
-
with _add_init_error_context(
|
|
287
|
-
f"find a registered source for type {self.source_type}"
|
|
288
|
-
):
|
|
289
|
-
source_class = source_registry.get(self.source_type)
|
|
290
|
-
|
|
291
|
-
with _add_init_error_context(f"configure the source ({self.source_type})"):
|
|
292
|
-
self.source = source_class.create(
|
|
293
|
-
self.config.source.dict().get("config", {}), self.ctx
|
|
294
|
-
)
|
|
295
|
-
logger.debug(
|
|
296
|
-
f"Source type {self.source_type} ({source_class}) configured"
|
|
177
|
+
with contextlib.ExitStack() as exit_stack, contextlib.ExitStack() as inner_exit_stack:
|
|
178
|
+
self.graph: Optional[DataHubGraph] = None
|
|
179
|
+
with _add_init_error_context("connect to DataHub"):
|
|
180
|
+
if self.config.datahub_api:
|
|
181
|
+
self.graph = exit_stack.enter_context(
|
|
182
|
+
DataHubGraph(self.config.datahub_api)
|
|
183
|
+
)
|
|
184
|
+
self.graph.test_connection()
|
|
185
|
+
|
|
186
|
+
with _add_init_error_context("set up framework context"):
|
|
187
|
+
self.ctx = PipelineContext(
|
|
188
|
+
run_id=self.config.run_id,
|
|
189
|
+
graph=self.graph,
|
|
190
|
+
pipeline_name=self.config.pipeline_name,
|
|
191
|
+
dry_run=dry_run,
|
|
192
|
+
preview_mode=preview_mode,
|
|
193
|
+
pipeline_config=self.config,
|
|
297
194
|
)
|
|
298
|
-
logger.info("Source configured successfully.")
|
|
299
195
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
self.extractor = extractor_class(
|
|
304
|
-
self.config.source.extractor_config, self.ctx
|
|
196
|
+
if self.config.sink is None:
|
|
197
|
+
logger.info(
|
|
198
|
+
"No sink configured, attempting to use the default datahub-rest sink."
|
|
305
199
|
)
|
|
200
|
+
with _add_init_error_context("configure the default rest sink"):
|
|
201
|
+
self.sink_type = "datahub-rest"
|
|
202
|
+
self.sink = exit_stack.enter_context(
|
|
203
|
+
_make_default_rest_sink(self.ctx)
|
|
204
|
+
)
|
|
205
|
+
else:
|
|
206
|
+
self.sink_type = self.config.sink.type
|
|
207
|
+
with _add_init_error_context(
|
|
208
|
+
f"find a registered sink for type {self.sink_type}"
|
|
209
|
+
):
|
|
210
|
+
sink_class = sink_registry.get(self.sink_type)
|
|
211
|
+
|
|
212
|
+
with _add_init_error_context(f"configure the sink ({self.sink_type})"):
|
|
213
|
+
sink_config = self.config.sink.dict().get("config") or {}
|
|
214
|
+
self.sink = exit_stack.enter_context(
|
|
215
|
+
sink_class.create(sink_config, self.ctx)
|
|
216
|
+
)
|
|
217
|
+
logger.debug(
|
|
218
|
+
f"Sink type {self.sink_type} ({sink_class}) configured"
|
|
219
|
+
)
|
|
220
|
+
logger.info(f"Sink configured successfully. {self.sink.configured()}")
|
|
221
|
+
|
|
222
|
+
if self.graph is None and isinstance(self.sink, DatahubRestSink):
|
|
223
|
+
with _add_init_error_context("setup default datahub client"):
|
|
224
|
+
self.graph = self.sink.emitter.to_graph()
|
|
225
|
+
self.graph.test_connection()
|
|
226
|
+
self.ctx.graph = self.graph
|
|
227
|
+
telemetry_instance.set_context(server=self.graph)
|
|
228
|
+
|
|
229
|
+
with set_graph_context(self.graph):
|
|
230
|
+
with _add_init_error_context("configure reporters"):
|
|
231
|
+
self._configure_reporting(report_to)
|
|
306
232
|
|
|
307
|
-
|
|
308
|
-
|
|
233
|
+
with _add_init_error_context(
|
|
234
|
+
f"find a registered source for type {self.source_type}"
|
|
235
|
+
):
|
|
236
|
+
source_class = source_registry.get(self.source_type)
|
|
237
|
+
|
|
238
|
+
with _add_init_error_context(
|
|
239
|
+
f"configure the source ({self.source_type})"
|
|
240
|
+
):
|
|
241
|
+
self.source = inner_exit_stack.enter_context(
|
|
242
|
+
source_class.create(
|
|
243
|
+
self.config.source.dict().get("config", {}), self.ctx
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
logger.debug(
|
|
247
|
+
f"Source type {self.source_type} ({source_class}) configured"
|
|
248
|
+
)
|
|
249
|
+
logger.info("Source configured successfully.")
|
|
250
|
+
|
|
251
|
+
extractor_type = self.config.source.extractor
|
|
252
|
+
with _add_init_error_context(
|
|
253
|
+
f"configure the extractor ({extractor_type})"
|
|
254
|
+
):
|
|
255
|
+
extractor_class = extractor_registry.get(extractor_type)
|
|
256
|
+
self.extractor = inner_exit_stack.enter_context(
|
|
257
|
+
extractor_class(self.config.source.extractor_config, self.ctx)
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
with _add_init_error_context("configure transformers"):
|
|
261
|
+
self._configure_transforms()
|
|
262
|
+
|
|
263
|
+
# If all of the initialization succeeds, we can preserve the exit stack until the pipeline run.
|
|
264
|
+
# We need to use an exit stack so that if we have an exception during initialization,
|
|
265
|
+
# things that were already initialized are still cleaned up.
|
|
266
|
+
# We need to separate the source/extractor from the rest because stateful
|
|
267
|
+
# ingestion requires the source to be closed before the state can be updated.
|
|
268
|
+
self.inner_exit_stack = inner_exit_stack.pop_all()
|
|
269
|
+
self.exit_stack = exit_stack.pop_all()
|
|
309
270
|
|
|
310
271
|
@property
|
|
311
272
|
def source_type(self) -> str:
|
|
@@ -439,18 +400,20 @@ class Pipeline:
|
|
|
439
400
|
return True
|
|
440
401
|
return False
|
|
441
402
|
|
|
442
|
-
def run(self) -> None:
|
|
443
|
-
with
|
|
403
|
+
def run(self) -> None:
|
|
404
|
+
with self.exit_stack, self.inner_exit_stack:
|
|
444
405
|
if self.config.flags.generate_memory_profiles:
|
|
445
406
|
import memray
|
|
446
407
|
|
|
447
|
-
|
|
408
|
+
self.exit_stack.enter_context(
|
|
448
409
|
memray.Tracker(
|
|
449
410
|
f"{self.config.flags.generate_memory_profiles}/{self.config.run_id}.bin"
|
|
450
411
|
)
|
|
451
412
|
)
|
|
452
413
|
|
|
453
|
-
|
|
414
|
+
self.exit_stack.enter_context(
|
|
415
|
+
change_default_attribution(KnownAttribution.INGESTION)
|
|
416
|
+
)
|
|
454
417
|
|
|
455
418
|
self.final_status = PipelineStatus.UNKNOWN
|
|
456
419
|
self._notify_reporters_on_ingestion_start()
|
|
@@ -459,8 +422,10 @@ class Pipeline:
|
|
|
459
422
|
callback = (
|
|
460
423
|
LoggingCallback()
|
|
461
424
|
if not self.config.failure_log.enabled
|
|
462
|
-
else
|
|
463
|
-
|
|
425
|
+
else self.exit_stack.enter_context(
|
|
426
|
+
DeadLetterQueueCallback(
|
|
427
|
+
self.ctx, self.config.failure_log.log_config
|
|
428
|
+
)
|
|
464
429
|
)
|
|
465
430
|
)
|
|
466
431
|
for wu in itertools.islice(
|
|
@@ -506,12 +471,11 @@ class Pipeline:
|
|
|
506
471
|
"Failed to process some records. Continuing.",
|
|
507
472
|
exc_info=e,
|
|
508
473
|
)
|
|
509
|
-
# TODO: Transformer errors should
|
|
474
|
+
# TODO: Transformer errors should be reported more loudly / as part of the pipeline report.
|
|
510
475
|
|
|
511
476
|
if not self.dry_run:
|
|
512
477
|
self.sink.handle_work_unit_end(wu)
|
|
513
|
-
|
|
514
|
-
self.source.close()
|
|
478
|
+
|
|
515
479
|
# no more data is coming, we need to let the transformers produce any additional records if they are holding on to state
|
|
516
480
|
for record_envelope in self.transform(
|
|
517
481
|
[
|
|
@@ -527,6 +491,11 @@ class Pipeline:
|
|
|
527
491
|
# TODO: propagate EndOfStream and other control events to sinks, to allow them to flush etc.
|
|
528
492
|
self.sink.write_record_async(record_envelope, callback)
|
|
529
493
|
|
|
494
|
+
# Stateful ingestion generates the updated state objects as part of the
|
|
495
|
+
# source's close method. Because of that, we need to close the source
|
|
496
|
+
# before we call process_commits.
|
|
497
|
+
self.inner_exit_stack.close()
|
|
498
|
+
|
|
530
499
|
self.process_commits()
|
|
531
500
|
self.final_status = PipelineStatus.COMPLETED
|
|
532
501
|
except (SystemExit, KeyboardInterrupt) as e:
|
|
@@ -539,9 +508,6 @@ class Pipeline:
|
|
|
539
508
|
finally:
|
|
540
509
|
clear_global_warnings()
|
|
541
510
|
|
|
542
|
-
if callback and hasattr(callback, "close"):
|
|
543
|
-
callback.close() # type: ignore
|
|
544
|
-
|
|
545
511
|
self._notify_reporters_on_ingestion_completion()
|
|
546
512
|
|
|
547
513
|
def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
|
|
@@ -560,10 +526,8 @@ class Pipeline:
|
|
|
560
526
|
Evaluates the commit_policy for each committable in the context and triggers the commit operation
|
|
561
527
|
on the committable if its required commit policies are satisfied.
|
|
562
528
|
"""
|
|
563
|
-
has_errors: bool = (
|
|
564
|
-
|
|
565
|
-
if self.source.get_report().failures or self.sink.get_report().failures
|
|
566
|
-
else False
|
|
529
|
+
has_errors: bool = bool(
|
|
530
|
+
self.source.get_report().failures or self.sink.get_report().failures
|
|
567
531
|
)
|
|
568
532
|
has_warnings: bool = bool(
|
|
569
533
|
self.source.get_report().warnings or self.sink.get_report().warnings
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import threading
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from datahub.ingestion.api.closeable import Closeable
|
|
6
|
+
from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
|
|
7
|
+
from datahub.ingestion.api.sink import WriteCallback
|
|
8
|
+
from datahub.ingestion.sink.file import FileSink, FileSinkConfig
|
|
9
|
+
from datahub.metadata.schema_classes import MetadataChangeProposalClass
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LoggingCallback(WriteCallback):
|
|
15
|
+
def __init__(self, name: str = "") -> None:
|
|
16
|
+
super().__init__()
|
|
17
|
+
self.name = name
|
|
18
|
+
|
|
19
|
+
def on_success(
|
|
20
|
+
self, record_envelope: RecordEnvelope, success_metadata: dict
|
|
21
|
+
) -> None:
|
|
22
|
+
logger.debug(
|
|
23
|
+
f"{self.name} sink wrote workunit {record_envelope.metadata['workunit_id']}"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
def on_failure(
|
|
27
|
+
self,
|
|
28
|
+
record_envelope: RecordEnvelope,
|
|
29
|
+
failure_exception: Exception,
|
|
30
|
+
failure_metadata: dict,
|
|
31
|
+
) -> None:
|
|
32
|
+
logger.error(
|
|
33
|
+
f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}",
|
|
34
|
+
extra={"failure_metadata": failure_metadata},
|
|
35
|
+
exc_info=failure_exception,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DeadLetterQueueCallback(WriteCallback, Closeable):
|
|
40
|
+
def __init__(self, ctx: PipelineContext, config: Optional[FileSinkConfig]) -> None:
|
|
41
|
+
if not config:
|
|
42
|
+
config = FileSinkConfig.parse_obj({"filename": "failed_events.json"})
|
|
43
|
+
self.file_sink: FileSink = FileSink(ctx, config)
|
|
44
|
+
self.file_sink_lock = threading.Lock()
|
|
45
|
+
self.logging_callback = LoggingCallback(name="failure-queue")
|
|
46
|
+
logger.info(f"Failure logging enabled. Will log to {config.filename}.")
|
|
47
|
+
|
|
48
|
+
def on_success(
|
|
49
|
+
self, record_envelope: RecordEnvelope, success_metadata: dict
|
|
50
|
+
) -> None:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def on_failure(
|
|
54
|
+
self,
|
|
55
|
+
record_envelope: RecordEnvelope,
|
|
56
|
+
failure_exception: Exception,
|
|
57
|
+
failure_metadata: dict,
|
|
58
|
+
) -> None:
|
|
59
|
+
if "workunit_id" in record_envelope.metadata and isinstance(
|
|
60
|
+
record_envelope.record, MetadataChangeProposalClass
|
|
61
|
+
):
|
|
62
|
+
mcp: MetadataChangeProposalClass = record_envelope.record
|
|
63
|
+
if mcp.systemMetadata:
|
|
64
|
+
if not mcp.systemMetadata.properties:
|
|
65
|
+
mcp.systemMetadata.properties = {}
|
|
66
|
+
if "workunit_id" not in mcp.systemMetadata.properties:
|
|
67
|
+
# update the workunit id
|
|
68
|
+
mcp.systemMetadata.properties["workunit_id"] = (
|
|
69
|
+
record_envelope.metadata["workunit_id"]
|
|
70
|
+
)
|
|
71
|
+
record_envelope.record = mcp
|
|
72
|
+
with self.file_sink_lock:
|
|
73
|
+
self.file_sink.write_record_async(record_envelope, self.logging_callback)
|
|
74
|
+
|
|
75
|
+
def close(self) -> None:
|
|
76
|
+
with self.file_sink_lock:
|
|
77
|
+
self.file_sink.close()
|
|
@@ -20,7 +20,11 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
|
20
20
|
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
21
21
|
from datahub.emitter.rest_emitter import (
|
|
22
22
|
BATCH_INGEST_MAX_PAYLOAD_LENGTH,
|
|
23
|
+
DEFAULT_REST_SINK_ENDPOINT,
|
|
24
|
+
DEFAULT_REST_TRACE_MODE,
|
|
23
25
|
DataHubRestEmitter,
|
|
26
|
+
RestSinkEndpoint,
|
|
27
|
+
RestTraceMode,
|
|
24
28
|
)
|
|
25
29
|
from datahub.ingestion.api.common import RecordEnvelope, WorkUnit
|
|
26
30
|
from datahub.ingestion.api.sink import (
|
|
@@ -66,6 +70,8 @@ _DEFAULT_REST_SINK_MODE = pydantic.parse_obj_as(
|
|
|
66
70
|
|
|
67
71
|
class DatahubRestSinkConfig(DatahubClientConfig):
|
|
68
72
|
mode: RestSinkMode = _DEFAULT_REST_SINK_MODE
|
|
73
|
+
endpoint: RestSinkEndpoint = DEFAULT_REST_SINK_ENDPOINT
|
|
74
|
+
default_trace_mode: RestTraceMode = DEFAULT_REST_TRACE_MODE
|
|
69
75
|
|
|
70
76
|
# These only apply in async modes.
|
|
71
77
|
max_threads: pydantic.PositiveInt = _DEFAULT_REST_SINK_MAX_THREADS
|
|
@@ -172,6 +178,8 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
|
|
|
172
178
|
ca_certificate_path=config.ca_certificate_path,
|
|
173
179
|
client_certificate_path=config.client_certificate_path,
|
|
174
180
|
disable_ssl_verification=config.disable_ssl_verification,
|
|
181
|
+
openapi_ingestion=config.endpoint == RestSinkEndpoint.OPENAPI,
|
|
182
|
+
default_trace_mode=config.default_trace_mode == RestTraceMode.ENABLED,
|
|
175
183
|
)
|
|
176
184
|
|
|
177
185
|
@property
|
|
@@ -144,10 +144,8 @@ class DataLakeSourceConfig(
|
|
|
144
144
|
return path_specs
|
|
145
145
|
|
|
146
146
|
@pydantic.validator("platform", always=True)
|
|
147
|
-
def platform_not_empty(cls, platform:
|
|
148
|
-
inferred_platform = values.get(
|
|
149
|
-
"platform", None
|
|
150
|
-
) # we may have inferred it above
|
|
147
|
+
def platform_not_empty(cls, platform: Any, values: dict) -> str:
|
|
148
|
+
inferred_platform = values.get("platform") # we may have inferred it above
|
|
151
149
|
platform = platform or inferred_platform
|
|
152
150
|
if not platform:
|
|
153
151
|
raise ValueError("platform must not be empty")
|
|
@@ -165,7 +165,7 @@ class BigQueryTableRef:
|
|
|
165
165
|
@classmethod
|
|
166
166
|
def from_spec_obj(cls, spec: dict) -> "BigQueryTableRef":
|
|
167
167
|
for key in ["projectId", "datasetId", "tableId"]:
|
|
168
|
-
if key not in spec
|
|
168
|
+
if key not in spec:
|
|
169
169
|
raise ValueError(f"invalid BigQuery table reference dict: {spec}")
|
|
170
170
|
|
|
171
171
|
return cls(
|
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
2
|
import os
|
|
4
3
|
import re
|
|
5
|
-
import tempfile
|
|
6
4
|
from datetime import timedelta
|
|
7
5
|
from typing import Any, Dict, List, Optional, Union
|
|
8
6
|
|
|
@@ -17,10 +15,10 @@ from datahub.configuration.source_common import (
|
|
|
17
15
|
PlatformInstanceConfigMixin,
|
|
18
16
|
)
|
|
19
17
|
from datahub.configuration.validate_field_removal import pydantic_removed_field
|
|
20
|
-
from datahub.configuration.validate_multiline_string import pydantic_multiline_string
|
|
21
18
|
from datahub.ingestion.glossary.classification_mixin import (
|
|
22
19
|
ClassificationSourceConfigMixin,
|
|
23
20
|
)
|
|
21
|
+
from datahub.ingestion.source.common.gcp_credentials_config import GCPCredential
|
|
24
22
|
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
|
|
25
23
|
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterConfig
|
|
26
24
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
@@ -107,50 +105,8 @@ class BigQueryUsageConfig(BaseUsageConfig):
|
|
|
107
105
|
)
|
|
108
106
|
|
|
109
107
|
|
|
110
|
-
class BigQueryCredential(ConfigModel):
|
|
111
|
-
project_id: str = Field(description="Project id to set the credentials")
|
|
112
|
-
private_key_id: str = Field(description="Private key id")
|
|
113
|
-
private_key: str = Field(
|
|
114
|
-
description="Private key in a form of '-----BEGIN PRIVATE KEY-----\\nprivate-key\\n-----END PRIVATE KEY-----\\n'"
|
|
115
|
-
)
|
|
116
|
-
client_email: str = Field(description="Client email")
|
|
117
|
-
client_id: str = Field(description="Client Id")
|
|
118
|
-
auth_uri: str = Field(
|
|
119
|
-
default="https://accounts.google.com/o/oauth2/auth",
|
|
120
|
-
description="Authentication uri",
|
|
121
|
-
)
|
|
122
|
-
token_uri: str = Field(
|
|
123
|
-
default="https://oauth2.googleapis.com/token", description="Token uri"
|
|
124
|
-
)
|
|
125
|
-
auth_provider_x509_cert_url: str = Field(
|
|
126
|
-
default="https://www.googleapis.com/oauth2/v1/certs",
|
|
127
|
-
description="Auth provider x509 certificate url",
|
|
128
|
-
)
|
|
129
|
-
type: str = Field(default="service_account", description="Authentication type")
|
|
130
|
-
client_x509_cert_url: Optional[str] = Field(
|
|
131
|
-
default=None,
|
|
132
|
-
description="If not set it will be default to https://www.googleapis.com/robot/v1/metadata/x509/client_email",
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
_fix_private_key_newlines = pydantic_multiline_string("private_key")
|
|
136
|
-
|
|
137
|
-
@root_validator(skip_on_failure=True)
|
|
138
|
-
def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
139
|
-
if values.get("client_x509_cert_url") is None:
|
|
140
|
-
values["client_x509_cert_url"] = (
|
|
141
|
-
f"https://www.googleapis.com/robot/v1/metadata/x509/{values['client_email']}"
|
|
142
|
-
)
|
|
143
|
-
return values
|
|
144
|
-
|
|
145
|
-
def create_credential_temp_file(self) -> str:
|
|
146
|
-
with tempfile.NamedTemporaryFile(delete=False) as fp:
|
|
147
|
-
cred_json = json.dumps(self.dict(), indent=4, separators=(",", ": "))
|
|
148
|
-
fp.write(cred_json.encode())
|
|
149
|
-
return fp.name
|
|
150
|
-
|
|
151
|
-
|
|
152
108
|
class BigQueryConnectionConfig(ConfigModel):
|
|
153
|
-
credential: Optional[
|
|
109
|
+
credential: Optional[GCPCredential] = Field(
|
|
154
110
|
default=None, description="BigQuery credential informations"
|
|
155
111
|
)
|
|
156
112
|
|
|
@@ -292,6 +292,11 @@ class BigQuerySchemaApi:
|
|
|
292
292
|
if hasattr(d, "_properties") and isinstance(d._properties, dict)
|
|
293
293
|
else None
|
|
294
294
|
),
|
|
295
|
+
# TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
|
|
296
|
+
# TODO: Given we are calling get_dataset for each dataset, we may consume and publish other fields too, such as created, modified, etc...
|
|
297
|
+
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
|
|
298
|
+
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
|
|
299
|
+
comment=self.bq_client.get_dataset(d.reference).description,
|
|
295
300
|
)
|
|
296
301
|
for d in datasets
|
|
297
302
|
]
|
|
@@ -339,7 +344,7 @@ class BigQuerySchemaApi:
|
|
|
339
344
|
with_partitions: bool = False,
|
|
340
345
|
) -> Iterator[BigqueryTable]:
|
|
341
346
|
with PerfTimer() as current_timer:
|
|
342
|
-
filter_clause: str = ", ".join(f"'{table}'" for table in tables
|
|
347
|
+
filter_clause: str = ", ".join(f"'{table}'" for table in tables)
|
|
343
348
|
|
|
344
349
|
if with_partitions:
|
|
345
350
|
query_template = BigqueryQuery.tables_for_dataset
|
|
@@ -296,6 +296,7 @@ class BigQuerySchemaGenerator:
|
|
|
296
296
|
self,
|
|
297
297
|
dataset: str,
|
|
298
298
|
project_id: str,
|
|
299
|
+
description: Optional[str] = None,
|
|
299
300
|
tags: Optional[Dict[str, str]] = None,
|
|
300
301
|
extra_properties: Optional[Dict[str, str]] = None,
|
|
301
302
|
) -> Iterable[MetadataWorkUnit]:
|
|
@@ -336,6 +337,7 @@ class BigQuerySchemaGenerator:
|
|
|
336
337
|
domain_config=self.config.domain,
|
|
337
338
|
schema_container_key=schema_container_key,
|
|
338
339
|
database_container_key=database_container_key,
|
|
340
|
+
description=description,
|
|
339
341
|
external_url=(
|
|
340
342
|
BQ_EXTERNAL_DATASET_URL_TEMPLATE.format(
|
|
341
343
|
project=project_id, dataset=dataset
|
|
@@ -471,14 +473,15 @@ class BigQuerySchemaGenerator:
|
|
|
471
473
|
|
|
472
474
|
if self.config.include_schema_metadata:
|
|
473
475
|
yield from self.gen_dataset_containers(
|
|
474
|
-
dataset_name,
|
|
475
|
-
project_id,
|
|
476
|
-
bigquery_dataset.labels,
|
|
477
|
-
(
|
|
476
|
+
dataset=dataset_name,
|
|
477
|
+
project_id=project_id,
|
|
478
|
+
tags=bigquery_dataset.labels,
|
|
479
|
+
extra_properties=(
|
|
478
480
|
{"location": bigquery_dataset.location}
|
|
479
481
|
if bigquery_dataset.location
|
|
480
482
|
else None
|
|
481
483
|
),
|
|
484
|
+
description=bigquery_dataset.comment,
|
|
482
485
|
)
|
|
483
486
|
|
|
484
487
|
columns = None
|