acryl-datahub 1.1.1rc3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/METADATA +2559 -2532
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/RECORD +226 -190
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/lake_formation_external_entites.py +161 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +124 -27
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +12 -16
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/emitter/response_helper.py +86 -1
- datahub/emitter/rest_emitter.py +71 -13
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +48 -44
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +100 -15
- datahub/ingestion/graph/config.py +1 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +13 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +4 -0
- datahub/ingestion/source/aws/glue.py +489 -244
- datahub/ingestion/source/aws/tag_entities.py +292 -0
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/config.py +11 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +187 -35
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
- datahub/ingestion/source/dbt/dbt_common.py +6 -2
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/openapi.py +12 -0
- datahub/ingestion/source/openapi_parser.py +56 -37
- datahub/ingestion/source/powerbi/powerbi.py +1 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +43 -7
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +33 -8
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -11
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -11
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +239 -34
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +121 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +58 -3
- datahub/ingestion/source/tableau/tableau.py +58 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +118 -0
- datahub/ingestion/source/unity/source.py +195 -17
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1446 -559
- datahub/metadata/_urns/urn_defs.py +1721 -1553
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +18055 -17802
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/sdk/__init__.py +6 -0
- datahub/sdk/_all_entities.py +11 -0
- datahub/sdk/_shared.py +118 -1
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +90 -2
- datahub/sdk/lineage_client.py +683 -82
- datahub/sdk/main_client.py +46 -16
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +4 -3
- datahub/specific/chart.py +1 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +62 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -56,6 +56,7 @@ from datahub.sql_parsing.sql_parsing_common import (
|
|
|
56
56
|
QueryTypeProps,
|
|
57
57
|
)
|
|
58
58
|
from datahub.sql_parsing.sqlglot_utils import (
|
|
59
|
+
DialectOrStr,
|
|
59
60
|
get_dialect,
|
|
60
61
|
get_query_fingerprint_debug,
|
|
61
62
|
is_dialect_instance,
|
|
@@ -124,6 +125,17 @@ class _DownstreamColumnRef(_ParserBaseModel):
|
|
|
124
125
|
|
|
125
126
|
|
|
126
127
|
class DownstreamColumnRef(_ParserBaseModel):
|
|
128
|
+
"""
|
|
129
|
+
TODO: Instead of implementing custom __hash__ function this class should simply inherit from _FrozenModel.
|
|
130
|
+
What stops us is that `column_type` field of type `SchemaFieldDataTypeClass` is not hashable - it's an
|
|
131
|
+
auto-generated class from .pdl model files. We need generic solution allowing us to either:
|
|
132
|
+
1. Implement hashing for .pdl model objects
|
|
133
|
+
2. Reliably provide pydantic (both v1 and v2) with information to skip particular fields from default
|
|
134
|
+
hash function - with a twist here that _FrozenModel implements its own `__lt__` function - it needs
|
|
135
|
+
to understand that instruction as well.
|
|
136
|
+
Instances of this class needs to be hashable as we store them in a set when processing lineage from queries.
|
|
137
|
+
"""
|
|
138
|
+
|
|
127
139
|
table: Optional[Urn] = None
|
|
128
140
|
column: str
|
|
129
141
|
column_type: Optional[SchemaFieldDataTypeClass] = None
|
|
@@ -139,8 +151,11 @@ class DownstreamColumnRef(_ParserBaseModel):
|
|
|
139
151
|
return v
|
|
140
152
|
return SchemaFieldDataTypeClass.from_obj(v)
|
|
141
153
|
|
|
154
|
+
def __hash__(self) -> int:
|
|
155
|
+
return hash((self.table, self.column, self.native_column_type))
|
|
156
|
+
|
|
142
157
|
|
|
143
|
-
class ColumnTransformation(
|
|
158
|
+
class ColumnTransformation(_FrozenModel):
|
|
144
159
|
is_direct_copy: bool
|
|
145
160
|
column_logic: str
|
|
146
161
|
|
|
@@ -153,11 +168,21 @@ class _ColumnLineageInfo(_ParserBaseModel):
|
|
|
153
168
|
|
|
154
169
|
|
|
155
170
|
class ColumnLineageInfo(_ParserBaseModel):
|
|
171
|
+
"""
|
|
172
|
+
TODO: Instead of implementing custom __hash__ function this class should simply inherit from _FrozenModel.
|
|
173
|
+
To achieve this, we need to change `upstreams` to `Tuple[ColumnRef, ...]` - along with many code lines
|
|
174
|
+
depending on it.
|
|
175
|
+
Instances of this class needs to be hashable as we store them in a set when processing lineage from queries.
|
|
176
|
+
"""
|
|
177
|
+
|
|
156
178
|
downstream: DownstreamColumnRef
|
|
157
179
|
upstreams: List[ColumnRef]
|
|
158
180
|
|
|
159
181
|
logic: Optional[ColumnTransformation] = pydantic.Field(default=None)
|
|
160
182
|
|
|
183
|
+
def __hash__(self) -> int:
|
|
184
|
+
return hash((self.downstream, tuple(self.upstreams), self.logic))
|
|
185
|
+
|
|
161
186
|
|
|
162
187
|
class _JoinInfo(_ParserBaseModel):
|
|
163
188
|
join_type: str
|
|
@@ -1231,12 +1256,12 @@ def _sqlglot_lineage_inner(
|
|
|
1231
1256
|
schema_resolver: SchemaResolverInterface,
|
|
1232
1257
|
default_db: Optional[str] = None,
|
|
1233
1258
|
default_schema: Optional[str] = None,
|
|
1234
|
-
|
|
1259
|
+
override_dialect: Optional[DialectOrStr] = None,
|
|
1235
1260
|
) -> SqlParsingResult:
|
|
1236
|
-
if
|
|
1237
|
-
dialect = get_dialect(
|
|
1261
|
+
if override_dialect:
|
|
1262
|
+
dialect = get_dialect(override_dialect)
|
|
1238
1263
|
else:
|
|
1239
|
-
dialect = get_dialect(
|
|
1264
|
+
dialect = get_dialect(schema_resolver.platform)
|
|
1240
1265
|
|
|
1241
1266
|
default_db = _normalize_db_or_schema(default_db, dialect)
|
|
1242
1267
|
default_schema = _normalize_db_or_schema(default_schema, dialect)
|
|
@@ -1423,7 +1448,7 @@ def _sqlglot_lineage_nocache(
|
|
|
1423
1448
|
schema_resolver: SchemaResolverInterface,
|
|
1424
1449
|
default_db: Optional[str] = None,
|
|
1425
1450
|
default_schema: Optional[str] = None,
|
|
1426
|
-
|
|
1451
|
+
override_dialect: Optional[DialectOrStr] = None,
|
|
1427
1452
|
) -> SqlParsingResult:
|
|
1428
1453
|
"""Parse a SQL statement and generate lineage information.
|
|
1429
1454
|
|
|
@@ -1441,8 +1466,8 @@ def _sqlglot_lineage_nocache(
|
|
|
1441
1466
|
can be brittle with respect to missing schema information and complex
|
|
1442
1467
|
SQL logic like UNNESTs.
|
|
1443
1468
|
|
|
1444
|
-
The SQL dialect
|
|
1445
|
-
be
|
|
1469
|
+
The SQL dialect will be inferred from the schema_resolver's platform.
|
|
1470
|
+
That inference can be overridden by passing an override_dialect argument.
|
|
1446
1471
|
The set of supported dialects is the same as sqlglot's. See their
|
|
1447
1472
|
`documentation <https://sqlglot.com/sqlglot/dialects/dialect.html#Dialects>`_
|
|
1448
1473
|
for the full list.
|
|
@@ -1457,7 +1482,7 @@ def _sqlglot_lineage_nocache(
|
|
|
1457
1482
|
schema_resolver: The schema resolver to use for resolving table schemas.
|
|
1458
1483
|
default_db: The default database to use for unqualified table names.
|
|
1459
1484
|
default_schema: The default schema to use for unqualified table names.
|
|
1460
|
-
|
|
1485
|
+
override_dialect: Override the dialect provided by 'schema_resolver'.
|
|
1461
1486
|
|
|
1462
1487
|
Returns:
|
|
1463
1488
|
A SqlParsingResult object containing the parsed lineage information.
|
|
@@ -1482,10 +1507,32 @@ def _sqlglot_lineage_nocache(
|
|
|
1482
1507
|
schema_resolver=schema_resolver,
|
|
1483
1508
|
default_db=default_db,
|
|
1484
1509
|
default_schema=default_schema,
|
|
1485
|
-
|
|
1510
|
+
override_dialect=override_dialect,
|
|
1486
1511
|
)
|
|
1487
1512
|
except Exception as e:
|
|
1488
1513
|
return SqlParsingResult.make_from_error(e)
|
|
1514
|
+
except BaseException as e:
|
|
1515
|
+
# Check if this is a PanicException from SQLGlot's Rust tokenizer
|
|
1516
|
+
# We use runtime type checking instead of isinstance() because pyo3_runtime
|
|
1517
|
+
# is only available when sqlglot[rs] is installed and may not be importable
|
|
1518
|
+
# at module load time, but the exception can still be raised at runtime
|
|
1519
|
+
if (
|
|
1520
|
+
e.__class__.__name__ == "PanicException"
|
|
1521
|
+
and e.__class__.__module__ == "pyo3_runtime"
|
|
1522
|
+
):
|
|
1523
|
+
# Handle pyo3_runtime.PanicException from SQLGlot's Rust tokenizer.
|
|
1524
|
+
# pyo3_runtime.PanicException inherits from BaseException (like SystemExit or
|
|
1525
|
+
# KeyboardInterrupt) rather than Exception, so it bypasses normal exception handling.
|
|
1526
|
+
# Avoid catching BaseException, as it includes KeyboardInterrupt
|
|
1527
|
+
# and would prevent Ctrl+C from working.
|
|
1528
|
+
wrapped_exception = Exception(
|
|
1529
|
+
f"pyo3_runtime.PanicException during SQL parsing: {e}"
|
|
1530
|
+
)
|
|
1531
|
+
wrapped_exception.__cause__ = e
|
|
1532
|
+
return SqlParsingResult.make_from_error(wrapped_exception)
|
|
1533
|
+
else:
|
|
1534
|
+
# Re-raise other BaseException types (SystemExit, KeyboardInterrupt, etc.)
|
|
1535
|
+
raise
|
|
1489
1536
|
|
|
1490
1537
|
|
|
1491
1538
|
_sqlglot_lineage_cached = functools.lru_cache(maxsize=SQL_PARSE_RESULT_CACHE_SIZE)(
|
|
@@ -1498,15 +1545,15 @@ def sqlglot_lineage(
|
|
|
1498
1545
|
schema_resolver: SchemaResolverInterface,
|
|
1499
1546
|
default_db: Optional[str] = None,
|
|
1500
1547
|
default_schema: Optional[str] = None,
|
|
1501
|
-
|
|
1548
|
+
override_dialect: Optional[DialectOrStr] = None,
|
|
1502
1549
|
) -> SqlParsingResult:
|
|
1503
1550
|
if schema_resolver.includes_temp_tables():
|
|
1504
1551
|
return _sqlglot_lineage_nocache(
|
|
1505
|
-
sql, schema_resolver, default_db, default_schema,
|
|
1552
|
+
sql, schema_resolver, default_db, default_schema, override_dialect
|
|
1506
1553
|
)
|
|
1507
1554
|
else:
|
|
1508
1555
|
return _sqlglot_lineage_cached(
|
|
1509
|
-
sql, schema_resolver, default_db, default_schema,
|
|
1556
|
+
sql, schema_resolver, default_db, default_schema, override_dialect
|
|
1510
1557
|
)
|
|
1511
1558
|
|
|
1512
1559
|
|
|
@@ -1558,6 +1605,7 @@ def create_lineage_sql_parsed_result(
|
|
|
1558
1605
|
default_schema: Optional[str] = None,
|
|
1559
1606
|
graph: Optional[DataHubGraph] = None,
|
|
1560
1607
|
schema_aware: bool = True,
|
|
1608
|
+
override_dialect: Optional[DialectOrStr] = None,
|
|
1561
1609
|
) -> SqlParsingResult:
|
|
1562
1610
|
schema_resolver = create_schema_resolver(
|
|
1563
1611
|
platform=platform,
|
|
@@ -1577,6 +1625,7 @@ def create_lineage_sql_parsed_result(
|
|
|
1577
1625
|
schema_resolver=schema_resolver,
|
|
1578
1626
|
default_db=default_db,
|
|
1579
1627
|
default_schema=default_schema,
|
|
1628
|
+
override_dialect=override_dialect,
|
|
1580
1629
|
)
|
|
1581
1630
|
except Exception as e:
|
|
1582
1631
|
return SqlParsingResult.make_from_error(e)
|
datahub/telemetry/telemetry.py
CHANGED
|
@@ -104,7 +104,7 @@ SENTRY_DSN: Optional[str] = os.environ.get("SENTRY_DSN", None)
|
|
|
104
104
|
SENTRY_ENVIRONMENT: str = os.environ.get("SENTRY_ENVIRONMENT", "dev")
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def
|
|
107
|
+
def _default_global_properties() -> Dict[str, Any]:
|
|
108
108
|
return {
|
|
109
109
|
"datahub_version": nice_version_name(),
|
|
110
110
|
"python_version": platform.python_version(),
|
|
@@ -122,6 +122,7 @@ class Telemetry:
|
|
|
122
122
|
context_properties: Dict[str, Any] = {}
|
|
123
123
|
|
|
124
124
|
def __init__(self):
|
|
125
|
+
self.global_properties = _default_global_properties()
|
|
125
126
|
self.context_properties = {}
|
|
126
127
|
|
|
127
128
|
if SENTRY_DSN:
|
|
@@ -247,6 +248,10 @@ class Telemetry:
|
|
|
247
248
|
|
|
248
249
|
return False
|
|
249
250
|
|
|
251
|
+
def add_global_property(self, key: str, value: Any) -> None:
|
|
252
|
+
self.global_properties[key] = value
|
|
253
|
+
self._update_sentry_properties()
|
|
254
|
+
|
|
250
255
|
def set_context(
|
|
251
256
|
self,
|
|
252
257
|
server: Optional["DataHubGraph"] = None,
|
|
@@ -257,16 +262,17 @@ class Telemetry:
|
|
|
257
262
|
**(properties or {}),
|
|
258
263
|
}
|
|
259
264
|
|
|
260
|
-
|
|
261
|
-
from sentry_sdk import set_tag
|
|
265
|
+
self._update_sentry_properties()
|
|
262
266
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
+
def _update_sentry_properties(self) -> None:
|
|
268
|
+
properties = {
|
|
269
|
+
**self.global_properties,
|
|
270
|
+
**self.context_properties,
|
|
271
|
+
}
|
|
272
|
+
if self.sentry_enabled:
|
|
273
|
+
import sentry_sdk
|
|
267
274
|
|
|
268
|
-
|
|
269
|
-
set_tag(key, properties[key])
|
|
275
|
+
sentry_sdk.set_tags(properties)
|
|
270
276
|
|
|
271
277
|
def init_capture_exception(self) -> None:
|
|
272
278
|
if self.sentry_enabled:
|
|
@@ -300,7 +306,7 @@ class Telemetry:
|
|
|
300
306
|
try:
|
|
301
307
|
self.mp.people_set(
|
|
302
308
|
self.client_id,
|
|
303
|
-
|
|
309
|
+
self.global_properties,
|
|
304
310
|
)
|
|
305
311
|
except Exception as e:
|
|
306
312
|
logger.debug(f"Error initializing telemetry: {e}")
|
|
@@ -334,7 +340,7 @@ class Telemetry:
|
|
|
334
340
|
logger.debug(f"Sending telemetry for {event_name}")
|
|
335
341
|
|
|
336
342
|
properties = {
|
|
337
|
-
**
|
|
343
|
+
**self.global_properties,
|
|
338
344
|
**self.context_properties,
|
|
339
345
|
**properties,
|
|
340
346
|
}
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
import pathlib
|
|
2
|
+
from typing import Sequence
|
|
2
3
|
|
|
3
4
|
from datahub.sdk.entity import Entity
|
|
4
5
|
from datahub.testing import mce_helpers
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
def assert_entity_golden(
|
|
8
|
+
def assert_entity_golden(
|
|
9
|
+
entity: Entity,
|
|
10
|
+
golden_path: pathlib.Path,
|
|
11
|
+
ignore_paths: Sequence[str] = (),
|
|
12
|
+
) -> None:
|
|
8
13
|
mce_helpers.check_goldens_stream(
|
|
9
14
|
outputs=entity.as_mcps(),
|
|
10
15
|
golden_path=golden_path,
|
|
11
16
|
ignore_order=False,
|
|
17
|
+
ignore_paths=ignore_paths,
|
|
12
18
|
)
|
datahub/upgrade/upgrade.py
CHANGED
|
@@ -32,6 +32,7 @@ class ServerVersionStats(BaseModel):
|
|
|
32
32
|
current: VersionStats
|
|
33
33
|
latest: Optional[VersionStats] = None
|
|
34
34
|
current_server_type: Optional[str] = None
|
|
35
|
+
current_server_default_cli_version: Optional[VersionStats] = None
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
class ClientVersionStats(BaseModel):
|
|
@@ -44,7 +45,7 @@ class DataHubVersionStats(BaseModel):
|
|
|
44
45
|
client: ClientVersionStats
|
|
45
46
|
|
|
46
47
|
|
|
47
|
-
async def get_client_version_stats():
|
|
48
|
+
async def get_client_version_stats() -> ClientVersionStats:
|
|
48
49
|
import aiohttp
|
|
49
50
|
|
|
50
51
|
current_version_string = __version__
|
|
@@ -52,6 +53,7 @@ async def get_client_version_stats():
|
|
|
52
53
|
client_version_stats: ClientVersionStats = ClientVersionStats(
|
|
53
54
|
current=VersionStats(version=current_version, release_date=None), latest=None
|
|
54
55
|
)
|
|
56
|
+
|
|
55
57
|
async with aiohttp.ClientSession() as session:
|
|
56
58
|
pypi_url = "https://pypi.org/pypi/acryl_datahub/json"
|
|
57
59
|
async with session.get(pypi_url) as resp:
|
|
@@ -131,7 +133,7 @@ async def get_server_config(gms_url: str, token: Optional[str]) -> RestServiceCo
|
|
|
131
133
|
|
|
132
134
|
async def get_server_version_stats(
|
|
133
135
|
server: Optional[DataHubGraph] = None,
|
|
134
|
-
) -> Tuple[Optional[str], Optional[Version], Optional[datetime]]:
|
|
136
|
+
) -> Tuple[Optional[str], Optional[Version], Optional[str], Optional[datetime]]:
|
|
135
137
|
import aiohttp
|
|
136
138
|
|
|
137
139
|
server_config: Optional[RestServiceConfig] = None
|
|
@@ -151,12 +153,13 @@ async def get_server_version_stats(
|
|
|
151
153
|
|
|
152
154
|
server_type = None
|
|
153
155
|
server_version: Optional[Version] = None
|
|
156
|
+
current_server_default_cli_version = None
|
|
154
157
|
current_server_release_date = None
|
|
155
158
|
if server_config:
|
|
156
159
|
server_version_string = server_config.service_version
|
|
157
160
|
commit_hash = server_config.commit_hash
|
|
158
161
|
server_type = server_config.server_type
|
|
159
|
-
|
|
162
|
+
current_server_default_cli_version = server_config.default_cli_version
|
|
160
163
|
if server_type == "quickstart" and commit_hash:
|
|
161
164
|
async with aiohttp.ClientSession(
|
|
162
165
|
headers={"Accept": "application/vnd.github.v3+json"}
|
|
@@ -171,7 +174,12 @@ async def get_server_version_stats(
|
|
|
171
174
|
if server_version_string and server_version_string.startswith("v"):
|
|
172
175
|
server_version = Version(server_version_string[1:])
|
|
173
176
|
|
|
174
|
-
return (
|
|
177
|
+
return (
|
|
178
|
+
server_type,
|
|
179
|
+
server_version,
|
|
180
|
+
current_server_default_cli_version,
|
|
181
|
+
current_server_release_date,
|
|
182
|
+
)
|
|
175
183
|
|
|
176
184
|
|
|
177
185
|
def retrieve_version_stats(
|
|
@@ -214,6 +222,7 @@ async def _retrieve_version_stats(
|
|
|
214
222
|
(
|
|
215
223
|
current_server_type,
|
|
216
224
|
current_server_version,
|
|
225
|
+
current_server_default_cli_version,
|
|
217
226
|
current_server_release_date,
|
|
218
227
|
) = results[2]
|
|
219
228
|
|
|
@@ -223,6 +232,14 @@ async def _retrieve_version_stats(
|
|
|
223
232
|
current=VersionStats(
|
|
224
233
|
version=current_server_version, release_date=current_server_release_date
|
|
225
234
|
),
|
|
235
|
+
current_server_default_cli_version=(
|
|
236
|
+
VersionStats(
|
|
237
|
+
version=Version(current_server_default_cli_version),
|
|
238
|
+
release_date=None,
|
|
239
|
+
)
|
|
240
|
+
if current_server_default_cli_version
|
|
241
|
+
else None
|
|
242
|
+
),
|
|
226
243
|
latest=(
|
|
227
244
|
VersionStats(version=last_server_version, release_date=last_server_date)
|
|
228
245
|
if last_server_version
|
|
@@ -255,21 +272,14 @@ def valid_client_version(version: Version) -> bool:
|
|
|
255
272
|
"""Only version strings like 0.4.5 and 0.6.7.8 are valid. 0.8.6.7rc1 is not"""
|
|
256
273
|
if version.is_prerelease or version.is_postrelease or version.is_devrelease:
|
|
257
274
|
return False
|
|
258
|
-
|
|
259
|
-
return True
|
|
260
|
-
|
|
261
|
-
return False
|
|
275
|
+
return True
|
|
262
276
|
|
|
263
277
|
|
|
264
278
|
def valid_server_version(version: Version) -> bool:
|
|
265
279
|
"""Only version strings like 0.8.x, 0.9.x or 0.10.x are valid. 0.1.x is not"""
|
|
266
280
|
if version.is_prerelease or version.is_postrelease or version.is_devrelease:
|
|
267
281
|
return False
|
|
268
|
-
|
|
269
|
-
if version.major == 0 and version.minor in [8, 9, 10]:
|
|
270
|
-
return True
|
|
271
|
-
|
|
272
|
-
return False
|
|
282
|
+
return True
|
|
273
283
|
|
|
274
284
|
|
|
275
285
|
def is_client_server_compatible(client: VersionStats, server: VersionStats) -> int:
|
|
@@ -291,6 +301,27 @@ def is_client_server_compatible(client: VersionStats, server: VersionStats) -> i
|
|
|
291
301
|
return server.version.micro - client.version.micro
|
|
292
302
|
|
|
293
303
|
|
|
304
|
+
def is_server_default_cli_ahead(version_stats: DataHubVersionStats) -> bool:
|
|
305
|
+
"""
|
|
306
|
+
Check if the server default CLI version is ahead of the current CLI version.
|
|
307
|
+
Returns True if server default CLI is newer and both versions are valid.
|
|
308
|
+
"""
|
|
309
|
+
if not version_stats.server.current_server_default_cli_version:
|
|
310
|
+
return False
|
|
311
|
+
|
|
312
|
+
current_cli = version_stats.client.current
|
|
313
|
+
server_default_cli = version_stats.server.current_server_default_cli_version
|
|
314
|
+
|
|
315
|
+
is_valid_client_version = valid_client_version(current_cli.version)
|
|
316
|
+
is_valid_server_version = valid_client_version(server_default_cli.version)
|
|
317
|
+
|
|
318
|
+
if not (is_valid_client_version and is_valid_server_version):
|
|
319
|
+
return False
|
|
320
|
+
|
|
321
|
+
compatibility_result = is_client_server_compatible(current_cli, server_default_cli)
|
|
322
|
+
return compatibility_result > 0
|
|
323
|
+
|
|
324
|
+
|
|
294
325
|
def _maybe_print_upgrade_message(
|
|
295
326
|
version_stats: Optional[DataHubVersionStats],
|
|
296
327
|
) -> None:
|
|
@@ -429,6 +460,8 @@ def check_upgrade_post(
|
|
|
429
460
|
|
|
430
461
|
|
|
431
462
|
def check_upgrade(func: Callable[..., T]) -> Callable[..., T]:
|
|
463
|
+
log.debug(f"Checking upgrade for {func.__module__}.{func.__name__}")
|
|
464
|
+
|
|
432
465
|
@wraps(func)
|
|
433
466
|
def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
434
467
|
with PerfTimer() as timer:
|
|
@@ -183,6 +183,14 @@ class RestServiceConfig:
|
|
|
183
183
|
managed_ingestion = self.raw_config.get("managedIngestion") or {}
|
|
184
184
|
return managed_ingestion.get("enabled", False)
|
|
185
185
|
|
|
186
|
+
@property
|
|
187
|
+
def default_cli_version(self) -> Optional[str]:
|
|
188
|
+
"""
|
|
189
|
+
Get the default CLI version.
|
|
190
|
+
"""
|
|
191
|
+
managed_ingestion = self.raw_config.get("managedIngestion") or {}
|
|
192
|
+
return managed_ingestion.get("defaultCliVersion")
|
|
193
|
+
|
|
186
194
|
@property
|
|
187
195
|
def is_datahub_cloud(self) -> bool:
|
|
188
196
|
"""
|
|
@@ -272,8 +272,11 @@ class SQLAlchemyQueryCombiner:
|
|
|
272
272
|
self.report.uncombined_queries_issued += 1
|
|
273
273
|
return _sa_execute_underlying_method(conn, query, *args, **kwargs)
|
|
274
274
|
|
|
275
|
-
with
|
|
276
|
-
|
|
275
|
+
with (
|
|
276
|
+
_sa_execute_method_patching_lock,
|
|
277
|
+
unittest.mock.patch(
|
|
278
|
+
"sqlalchemy.engine.Connection.execute", _sa_execute_fake
|
|
279
|
+
),
|
|
277
280
|
):
|
|
278
281
|
yield self
|
|
279
282
|
|
|
File without changes
|
|
File without changes
|