acryl-datahub 1.0.0rc1__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc2.dist-info}/METADATA +2363 -2363
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc2.dist-info}/RECORD +15 -14
- datahub/_version.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +1 -1
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +3 -0
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc2.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=NhFo4lGxW3jCq8mppqC9dZ4lwon5QQbURU6sUwCpKQs,321
|
|
4
4
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
5
5
|
datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
|
|
6
6
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -332,10 +332,10 @@ datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
332
332
|
datahub/ingestion/source/kafka/kafka.py,sha256=TX_9MFaecM1ZmwhX3krKsItEmNZX9c2i9024SmVo0io,26572
|
|
333
333
|
datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
334
334
|
datahub/ingestion/source/kafka_connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
335
|
-
datahub/ingestion/source/kafka_connect/common.py,sha256=
|
|
336
|
-
datahub/ingestion/source/kafka_connect/kafka_connect.py,sha256
|
|
337
|
-
datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=
|
|
338
|
-
datahub/ingestion/source/kafka_connect/source_connectors.py,sha256
|
|
335
|
+
datahub/ingestion/source/kafka_connect/common.py,sha256=6F9pPD_9uX6RcVLNy2Xpv_ipiqIZaLvsgdrj5o22pfA,7127
|
|
336
|
+
datahub/ingestion/source/kafka_connect/kafka_connect.py,sha256=AVAgBvgH7kM9I2ke3mwr8CfIL1J2SdVHH_86rnCFwrM,17727
|
|
337
|
+
datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=rNxolagqwQWQmVp4mDr1C-1TB6Drxc2b1dM9JSjNnuA,12905
|
|
338
|
+
datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=viCqy7fmQl_qyrIkEamRVuUb8_EtfvQjE00CHPi-980,21265
|
|
339
339
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
340
340
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
341
341
|
datahub/ingestion/source/looker/looker_common.py,sha256=dmcrzEWFxPzZhIeyUYLZuMzhgx7QzvGp4xLTrTYISCA,62136
|
|
@@ -442,7 +442,7 @@ datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=e9dCARIQtGB8G1
|
|
|
442
442
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
443
443
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
|
|
444
444
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
445
|
-
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=
|
|
445
|
+
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=R3QxWtdR8T_8YV_3aqt3rJdto1gAij_mEHlSYKqdCfA,28326
|
|
446
446
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
|
|
447
447
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
|
|
448
448
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=GFgcKV5T6VHyNwPBzzw_f8cWA9YFlWug0m6nkLoGXus,25979
|
|
@@ -917,12 +917,13 @@ datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,95
|
|
|
917
917
|
datahub/telemetry/telemetry.py,sha256=sGe3RsrkX1L_jrsRuz5Fd7_9vEY6mHMtkMqR_9_axbo,15025
|
|
918
918
|
datahub/testing/__init__.py,sha256=TywIuzGQvzJsNhI_PGD1RFk11M3RtGl9jIMtAVVHIkg,272
|
|
919
919
|
datahub/testing/check_imports.py,sha256=qs2bk__DeAlsvh-Y9ln9FQfG9DsdIVuSoxkoh4pMmms,2316
|
|
920
|
-
datahub/testing/check_sql_parser_result.py,sha256=
|
|
920
|
+
datahub/testing/check_sql_parser_result.py,sha256=1RV73w0Q7Jv7XoIz870oaooJFut21hXg72TIBunvdm8,2661
|
|
921
921
|
datahub/testing/check_str_enum.py,sha256=yqk0XXHOGteN-IGqCp5JHy0Kca13BnI09ZqKc4Nwl3E,1187
|
|
922
|
-
datahub/testing/compare_metadata_json.py,sha256=
|
|
922
|
+
datahub/testing/compare_metadata_json.py,sha256=mTU5evu7KLS3cx8OLOC1fFxj0eY1J1CGV2PEQZmapos,5361
|
|
923
923
|
datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqueY,2415
|
|
924
924
|
datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
|
|
925
925
|
datahub/testing/mcp_diff.py,sha256=Dxde5uZHqZf1EjOkHm405OHY5PPJp03agZJM9SyR4yE,10717
|
|
926
|
+
datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
|
|
926
927
|
datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
927
928
|
datahub/upgrade/upgrade.py,sha256=iDjIDY2YBl2XlKLvb5EMMdYOZ6KraeItgiu9Y4wIM1Q,16666
|
|
928
929
|
datahub/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -1012,9 +1013,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1012
1013
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1013
1014
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1014
1015
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1015
|
-
acryl_datahub-1.0.
|
|
1016
|
-
acryl_datahub-1.0.
|
|
1017
|
-
acryl_datahub-1.0.
|
|
1018
|
-
acryl_datahub-1.0.
|
|
1019
|
-
acryl_datahub-1.0.
|
|
1020
|
-
acryl_datahub-1.0.
|
|
1016
|
+
acryl_datahub-1.0.0rc2.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1017
|
+
acryl_datahub-1.0.0rc2.dist-info/METADATA,sha256=hYE4nG--7qk-ihjYN-kG6QT3NuXbRkR5iFX1N_squ_s,175366
|
|
1018
|
+
acryl_datahub-1.0.0rc2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1019
|
+
acryl_datahub-1.0.0rc2.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1020
|
+
acryl_datahub-1.0.0rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1021
|
+
acryl_datahub-1.0.0rc2.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Iterable, List, Optional, Type
|
|
2
|
+
from typing import Dict, Iterable, List, Optional, Type
|
|
3
3
|
|
|
4
4
|
import jpype
|
|
5
5
|
import jpype.imports
|
|
@@ -121,7 +121,11 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
121
121
|
connector_manifest.config, self.config.provided_configs
|
|
122
122
|
)
|
|
123
123
|
connector_manifest.url = connector_url
|
|
124
|
-
connector_manifest.topic_names = self._get_connector_topics(
|
|
124
|
+
connector_manifest.topic_names = self._get_connector_topics(
|
|
125
|
+
connector_name=connector_name,
|
|
126
|
+
config=connector_manifest.config,
|
|
127
|
+
connector_type=connector_manifest.type,
|
|
128
|
+
)
|
|
125
129
|
connector_class_value = connector_manifest.config.get(CONNECTOR_CLASS) or ""
|
|
126
130
|
|
|
127
131
|
class_type: Type[BaseConnector] = BaseConnector
|
|
@@ -203,7 +207,9 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
203
207
|
|
|
204
208
|
return response.json()
|
|
205
209
|
|
|
206
|
-
def _get_connector_topics(
|
|
210
|
+
def _get_connector_topics(
|
|
211
|
+
self, connector_name: str, config: Dict[str, str], connector_type: str
|
|
212
|
+
) -> List[str]:
|
|
207
213
|
try:
|
|
208
214
|
response = self.session.get(
|
|
209
215
|
f"{self.config.connect_uri}/connectors/{connector_name}/topics",
|
|
@@ -215,7 +221,21 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
215
221
|
)
|
|
216
222
|
return []
|
|
217
223
|
|
|
218
|
-
|
|
224
|
+
processed_topics = response.json()[connector_name]["topics"]
|
|
225
|
+
|
|
226
|
+
if connector_type == SINK:
|
|
227
|
+
try:
|
|
228
|
+
return SinkTopicFilter().filter_stale_topics(processed_topics, config)
|
|
229
|
+
except Exception as e:
|
|
230
|
+
self.report.warning(
|
|
231
|
+
title="Error parsing sink conector topics configuration",
|
|
232
|
+
message="Some stale lineage tasks might show up for connector",
|
|
233
|
+
context=connector_name,
|
|
234
|
+
exc=e,
|
|
235
|
+
)
|
|
236
|
+
return processed_topics
|
|
237
|
+
else:
|
|
238
|
+
return processed_topics
|
|
219
239
|
|
|
220
240
|
def construct_flow_workunit(self, connector: ConnectorManifest) -> MetadataWorkUnit:
|
|
221
241
|
connector_name = connector.name
|
|
@@ -359,3 +379,76 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
359
379
|
return builder.make_dataset_urn_with_platform_instance(
|
|
360
380
|
platform, name, platform_instance, self.config.env
|
|
361
381
|
)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class SinkTopicFilter:
|
|
385
|
+
"""Helper class to filter Kafka Connect topics based on configuration."""
|
|
386
|
+
|
|
387
|
+
def filter_stale_topics(
|
|
388
|
+
self,
|
|
389
|
+
processed_topics: List[str],
|
|
390
|
+
sink_config: Dict[str, str],
|
|
391
|
+
) -> List[str]:
|
|
392
|
+
"""
|
|
393
|
+
Kafka-connect's /topics API returns the set of topic names the connector has been using
|
|
394
|
+
since its creation or since the last time its set of active topics was reset. This means-
|
|
395
|
+
if a topic was ever used by a connector, it will be returned, even if it is no longer used.
|
|
396
|
+
To remove these stale topics from the list, we double-check the list returned by the API
|
|
397
|
+
against the sink connector's config.
|
|
398
|
+
Sink connectors configure exactly one of `topics` or `topics.regex`
|
|
399
|
+
https://kafka.apache.org/documentation/#sinkconnectorconfigs_topics
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
processed_topics: List of topics currently being processed
|
|
403
|
+
sink_config: Configuration dictionary for the sink connector
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
List of filtered topics that match the configuration
|
|
407
|
+
|
|
408
|
+
Raises:
|
|
409
|
+
ValueError: If sink connector configuration is missing both 'topics' and 'topics.regex' fields
|
|
410
|
+
|
|
411
|
+
"""
|
|
412
|
+
# Absence of topics config is a defensive NOOP,
|
|
413
|
+
# although this should never happen in real world
|
|
414
|
+
if not self.has_topic_config(sink_config):
|
|
415
|
+
logger.warning(
|
|
416
|
+
f"Found sink without topics config {sink_config.get(CONNECTOR_CLASS)}"
|
|
417
|
+
)
|
|
418
|
+
return processed_topics
|
|
419
|
+
|
|
420
|
+
# Handle explicit topic list
|
|
421
|
+
if sink_config.get("topics"):
|
|
422
|
+
return self._filter_by_topic_list(processed_topics, sink_config["topics"])
|
|
423
|
+
else:
|
|
424
|
+
# Handle regex pattern
|
|
425
|
+
return self._filter_by_topic_regex(
|
|
426
|
+
processed_topics, sink_config["topics.regex"]
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
def has_topic_config(self, sink_config: Dict[str, str]) -> bool:
|
|
430
|
+
"""Check if sink config has either topics or topics.regex."""
|
|
431
|
+
return bool(sink_config.get("topics") or sink_config.get("topics.regex"))
|
|
432
|
+
|
|
433
|
+
def _filter_by_topic_list(
|
|
434
|
+
self, processed_topics: List[str], topics_config: str
|
|
435
|
+
) -> List[str]:
|
|
436
|
+
"""Filter topics based on explicit topic list from config."""
|
|
437
|
+
config_topics = [
|
|
438
|
+
topic.strip() for topic in topics_config.split(",") if topic.strip()
|
|
439
|
+
]
|
|
440
|
+
return [topic for topic in processed_topics if topic in config_topics]
|
|
441
|
+
|
|
442
|
+
def _filter_by_topic_regex(
|
|
443
|
+
self, processed_topics: List[str], regex_pattern: str
|
|
444
|
+
) -> List[str]:
|
|
445
|
+
"""Filter topics based on regex pattern from config."""
|
|
446
|
+
from java.util.regex import Pattern
|
|
447
|
+
|
|
448
|
+
regex_matcher = Pattern.compile(regex_pattern)
|
|
449
|
+
|
|
450
|
+
return [
|
|
451
|
+
topic
|
|
452
|
+
for topic in processed_topics
|
|
453
|
+
if regex_matcher.matcher(topic).matches()
|
|
454
|
+
]
|
|
@@ -175,7 +175,7 @@ class BigQuerySinkConnector(BaseConnector):
|
|
|
175
175
|
class BQParser:
|
|
176
176
|
project: str
|
|
177
177
|
target_platform: str
|
|
178
|
-
sanitizeTopics:
|
|
178
|
+
sanitizeTopics: bool
|
|
179
179
|
transforms: list
|
|
180
180
|
topicsToTables: Optional[str] = None
|
|
181
181
|
datasets: Optional[str] = None
|
|
@@ -187,7 +187,7 @@ class BigQuerySinkConnector(BaseConnector):
|
|
|
187
187
|
connector_manifest: ConnectorManifest,
|
|
188
188
|
) -> BQParser:
|
|
189
189
|
project = connector_manifest.config["project"]
|
|
190
|
-
sanitizeTopics = connector_manifest.config.get("sanitizeTopics"
|
|
190
|
+
sanitizeTopics = connector_manifest.config.get("sanitizeTopics") or "false"
|
|
191
191
|
transform_names = (
|
|
192
192
|
self.connector_manifest.config.get("transforms", "").split(",")
|
|
193
193
|
if self.connector_manifest.config.get("transforms")
|
|
@@ -107,9 +107,9 @@ class ConfluentJDBCSourceConnector(BaseConnector):
|
|
|
107
107
|
assert database_name
|
|
108
108
|
db_connection_url = f"{url_instance.drivername}://{url_instance.host}:{url_instance.port}/{database_name}"
|
|
109
109
|
|
|
110
|
-
topic_prefix = self.connector_manifest.config.get("topic.prefix"
|
|
110
|
+
topic_prefix = self.connector_manifest.config.get("topic.prefix") or ""
|
|
111
111
|
|
|
112
|
-
query = self.connector_manifest.config.get("query"
|
|
112
|
+
query = self.connector_manifest.config.get("query") or ""
|
|
113
113
|
|
|
114
114
|
transform_names = (
|
|
115
115
|
self.connector_manifest.config.get("transforms", "").split(",")
|
|
@@ -731,6 +731,9 @@ fingerprinted_queries as (
|
|
|
731
731
|
JOIN filtered_access_history a USING (query_id)
|
|
732
732
|
)
|
|
733
733
|
SELECT * FROM query_access_history
|
|
734
|
+
-- Our query aggregator expects the queries to be added in chronological order.
|
|
735
|
+
-- It's easier for us to push down the sorting to Snowflake/SQL instead of doing it in Python.
|
|
736
|
+
ORDER BY QUERY_START_TIME ASC
|
|
734
737
|
"""
|
|
735
738
|
|
|
736
739
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import os
|
|
3
2
|
import pathlib
|
|
4
3
|
from typing import Any, Dict, Optional
|
|
5
4
|
|
|
@@ -8,11 +7,10 @@ import deepdiff
|
|
|
8
7
|
from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
|
|
9
8
|
from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
|
|
10
9
|
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult, sqlglot_lineage
|
|
10
|
+
from datahub.testing.pytest_hooks import get_golden_settings
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
|
-
UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true"
|
|
15
|
-
|
|
16
14
|
|
|
17
15
|
def assert_sql_result_with_resolver(
|
|
18
16
|
sql: str,
|
|
@@ -22,6 +20,8 @@ def assert_sql_result_with_resolver(
|
|
|
22
20
|
allow_table_error: bool = False,
|
|
23
21
|
**kwargs: Any,
|
|
24
22
|
) -> None:
|
|
23
|
+
settings = get_golden_settings()
|
|
24
|
+
|
|
25
25
|
# HACK: Our BigQuery source overwrites this value and doesn't undo it.
|
|
26
26
|
# As such, we need to handle that here.
|
|
27
27
|
BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "_yyyymmdd"
|
|
@@ -47,15 +47,14 @@ def assert_sql_result_with_resolver(
|
|
|
47
47
|
)
|
|
48
48
|
|
|
49
49
|
txt = res.json(indent=4)
|
|
50
|
-
if
|
|
50
|
+
if settings.update_golden:
|
|
51
51
|
expected_file.write_text(txt)
|
|
52
52
|
return
|
|
53
53
|
|
|
54
54
|
if not expected_file.exists():
|
|
55
55
|
expected_file.write_text(txt)
|
|
56
56
|
raise AssertionError(
|
|
57
|
-
f"
|
|
58
|
-
"Created it with the expected output. Please verify it."
|
|
57
|
+
f"Missing expected golden file; run with --update-golden-files to create it: {expected_file}"
|
|
59
58
|
)
|
|
60
59
|
|
|
61
60
|
expected = SqlParsingResult.parse_raw(expected_file.read_text())
|
|
@@ -16,6 +16,7 @@ from deepdiff import DeepDiff
|
|
|
16
16
|
from datahub.ingestion.sink.file import write_metadata_file
|
|
17
17
|
from datahub.ingestion.source.file import read_metadata_file
|
|
18
18
|
from datahub.testing.mcp_diff import CannotCompareMCPs, MCPDiff, get_aspects_by_urn
|
|
19
|
+
from datahub.testing.pytest_hooks import get_golden_settings
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
@@ -40,26 +41,26 @@ def load_json_file(filename: Union[str, os.PathLike]) -> MetadataJson:
|
|
|
40
41
|
def assert_metadata_files_equal(
|
|
41
42
|
output_path: Union[str, os.PathLike],
|
|
42
43
|
golden_path: Union[str, os.PathLike],
|
|
43
|
-
update_golden: bool,
|
|
44
|
-
copy_output: bool,
|
|
45
44
|
ignore_paths: Sequence[str] = (),
|
|
46
45
|
ignore_paths_v2: Sequence[str] = (),
|
|
47
46
|
ignore_order: bool = True,
|
|
48
47
|
) -> None:
|
|
48
|
+
settings = get_golden_settings()
|
|
49
|
+
|
|
49
50
|
golden_exists = os.path.isfile(golden_path)
|
|
50
51
|
|
|
51
|
-
if copy_output:
|
|
52
|
+
if settings.copy_output:
|
|
52
53
|
shutil.copyfile(str(output_path), str(golden_path) + ".output")
|
|
53
54
|
logger.info(f"Copied output file to {golden_path}.output")
|
|
54
55
|
|
|
55
|
-
if not update_golden and not golden_exists:
|
|
56
|
+
if not settings.update_golden and not golden_exists:
|
|
56
57
|
raise FileNotFoundError(
|
|
57
58
|
"Golden file does not exist. Please run with the --update-golden-files option to create."
|
|
58
59
|
)
|
|
59
60
|
|
|
60
61
|
output = load_json_file(output_path)
|
|
61
62
|
|
|
62
|
-
if update_golden and not golden_exists:
|
|
63
|
+
if settings.update_golden and not golden_exists:
|
|
63
64
|
shutil.copyfile(str(output_path), str(golden_path))
|
|
64
65
|
return
|
|
65
66
|
else:
|
|
@@ -87,7 +88,7 @@ def assert_metadata_files_equal(
|
|
|
87
88
|
ignore_paths = (*ignore_paths, *default_exclude_paths)
|
|
88
89
|
|
|
89
90
|
diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
|
|
90
|
-
if diff and update_golden:
|
|
91
|
+
if diff and settings.update_golden:
|
|
91
92
|
if isinstance(diff, MCPDiff) and diff.is_delta_valid:
|
|
92
93
|
logger.info(f"Applying delta to golden file {golden_path}")
|
|
93
94
|
diff.apply_delta(golden)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"load_golden_flags",
|
|
8
|
+
"get_golden_settings",
|
|
9
|
+
"pytest_addoption",
|
|
10
|
+
"GoldenFileSettings",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclasses.dataclass
|
|
15
|
+
class GoldenFileSettings:
|
|
16
|
+
update_golden: bool
|
|
17
|
+
copy_output: bool
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_registered: bool = False
|
|
21
|
+
_settings: Optional[GoldenFileSettings] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def pytest_addoption(parser: pytest.Parser) -> None:
|
|
25
|
+
parser.addoption(
|
|
26
|
+
"--update-golden-files",
|
|
27
|
+
action="store_true",
|
|
28
|
+
default=False,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# TODO: Deprecate and remove this flag.
|
|
32
|
+
parser.addoption("--copy-output-files", action="store_true", default=False)
|
|
33
|
+
|
|
34
|
+
global _registered
|
|
35
|
+
_registered = True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
39
|
+
def load_golden_flags(pytestconfig: pytest.Config) -> None:
|
|
40
|
+
global _settings
|
|
41
|
+
_settings = GoldenFileSettings(
|
|
42
|
+
update_golden=pytestconfig.getoption("--update-golden-files"),
|
|
43
|
+
copy_output=pytestconfig.getoption("--copy-output-files"),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_golden_settings() -> GoldenFileSettings:
|
|
48
|
+
if not _registered:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
"Golden files aren't set up properly. Call register_golden_flags from a conftest pytest_addoptions method."
|
|
51
|
+
)
|
|
52
|
+
if not _settings:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"Golden files aren't set up properly. Ensure load_golden_flags is imported in your conftest."
|
|
55
|
+
)
|
|
56
|
+
return _settings
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|