acryl-datahub 1.0.0rc1__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=eCl5OqKrV3CyQ-N-bMuAVTkwd6mFmeQ6QD9j87EU4YM,321
3
+ datahub/_version.py,sha256=NhFo4lGxW3jCq8mppqC9dZ4lwon5QQbURU6sUwCpKQs,321
4
4
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
5
5
  datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
6
6
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -332,10 +332,10 @@ datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
332
332
  datahub/ingestion/source/kafka/kafka.py,sha256=TX_9MFaecM1ZmwhX3krKsItEmNZX9c2i9024SmVo0io,26572
333
333
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
334
334
  datahub/ingestion/source/kafka_connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
335
- datahub/ingestion/source/kafka_connect/common.py,sha256=sfAiD48bDFfoXdqYiZuwP5BomJPP5DgaskgAyKkM3GY,7117
336
- datahub/ingestion/source/kafka_connect/kafka_connect.py,sha256=-ZmPayEYqYJ8rgWIGCkJPQd2z6C8FoZA1XXO3N55KbM,14036
337
- datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=sbLntDi0c52i8uUJmJ59sAcJeNErSogIJsr2-Zar-3Q,12902
338
- datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=-rFNXKD8_EFoXuU1CiKF3wHnsBtKCJrcYDwdTno98Xk,21265
335
+ datahub/ingestion/source/kafka_connect/common.py,sha256=6F9pPD_9uX6RcVLNy2Xpv_ipiqIZaLvsgdrj5o22pfA,7127
336
+ datahub/ingestion/source/kafka_connect/kafka_connect.py,sha256=AVAgBvgH7kM9I2ke3mwr8CfIL1J2SdVHH_86rnCFwrM,17727
337
+ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=rNxolagqwQWQmVp4mDr1C-1TB6Drxc2b1dM9JSjNnuA,12905
338
+ datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=viCqy7fmQl_qyrIkEamRVuUb8_EtfvQjE00CHPi-980,21265
339
339
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
340
340
  datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
341
341
  datahub/ingestion/source/looker/looker_common.py,sha256=dmcrzEWFxPzZhIeyUYLZuMzhgx7QzvGp4xLTrTYISCA,62136
@@ -442,7 +442,7 @@ datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=e9dCARIQtGB8G1
442
442
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
443
443
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
444
444
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
445
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=M-FBoYeiW91-g3gOUpCTj8cKWHH-wqyFtD5UcewfI2k,28121
445
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=R3QxWtdR8T_8YV_3aqt3rJdto1gAij_mEHlSYKqdCfA,28326
446
446
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=Ex9FZZzz02cQis4bV3tzd53Pmf8p3AreuWnv9w95pJ0,39642
447
447
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=ahea-bwpW6T0iDehGo0Qq_J7wKxPkV61aYHm8bGwDqo,6651
448
448
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=GFgcKV5T6VHyNwPBzzw_f8cWA9YFlWug0m6nkLoGXus,25979
@@ -917,12 +917,13 @@ datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,95
917
917
  datahub/telemetry/telemetry.py,sha256=sGe3RsrkX1L_jrsRuz5Fd7_9vEY6mHMtkMqR_9_axbo,15025
918
918
  datahub/testing/__init__.py,sha256=TywIuzGQvzJsNhI_PGD1RFk11M3RtGl9jIMtAVVHIkg,272
919
919
  datahub/testing/check_imports.py,sha256=qs2bk__DeAlsvh-Y9ln9FQfG9DsdIVuSoxkoh4pMmms,2316
920
- datahub/testing/check_sql_parser_result.py,sha256=f7U7IUSbfV4VACdNI857wPZ9tAZ9j6mXiXmcJNT_RzM,2671
920
+ datahub/testing/check_sql_parser_result.py,sha256=1RV73w0Q7Jv7XoIz870oaooJFut21hXg72TIBunvdm8,2661
921
921
  datahub/testing/check_str_enum.py,sha256=yqk0XXHOGteN-IGqCp5JHy0Kca13BnI09ZqKc4Nwl3E,1187
922
- datahub/testing/compare_metadata_json.py,sha256=pVJB2qLoKzEJLBXqFT-qGrxpA1y76y-mIbvJf0NnAD0,5274
922
+ datahub/testing/compare_metadata_json.py,sha256=mTU5evu7KLS3cx8OLOC1fFxj0eY1J1CGV2PEQZmapos,5361
923
923
  datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqueY,2415
924
924
  datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
925
925
  datahub/testing/mcp_diff.py,sha256=Dxde5uZHqZf1EjOkHm405OHY5PPJp03agZJM9SyR4yE,10717
926
+ datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
926
927
  datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
927
928
  datahub/upgrade/upgrade.py,sha256=iDjIDY2YBl2XlKLvb5EMMdYOZ6KraeItgiu9Y4wIM1Q,16666
928
929
  datahub/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -1012,9 +1013,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1012
1013
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1013
1014
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1014
1015
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1015
- acryl_datahub-1.0.0rc1.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1016
- acryl_datahub-1.0.0rc1.dist-info/METADATA,sha256=Z7QNzSBU3Gur8j0qxxg7MxnbI2CnvVleJLYc3IXohvM,175366
1017
- acryl_datahub-1.0.0rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1018
- acryl_datahub-1.0.0rc1.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1019
- acryl_datahub-1.0.0rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1020
- acryl_datahub-1.0.0rc1.dist-info/RECORD,,
1016
+ acryl_datahub-1.0.0rc2.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1017
+ acryl_datahub-1.0.0rc2.dist-info/METADATA,sha256=hYE4nG--7qk-ihjYN-kG6QT3NuXbRkR5iFX1N_squ_s,175366
1018
+ acryl_datahub-1.0.0rc2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1019
+ acryl_datahub-1.0.0rc2.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1020
+ acryl_datahub-1.0.0rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1021
+ acryl_datahub-1.0.0rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0rc1"
3
+ __version__ = "1.0.0rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -110,7 +110,7 @@ class ConnectorManifest:
110
110
 
111
111
  name: str
112
112
  type: str
113
- config: Dict
113
+ config: Dict[str, str]
114
114
  tasks: Dict
115
115
  url: Optional[str] = None
116
116
  flow_property_bag: Optional[Dict[str, str]] = None
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Iterable, List, Optional, Type
2
+ from typing import Dict, Iterable, List, Optional, Type
3
3
 
4
4
  import jpype
5
5
  import jpype.imports
@@ -121,7 +121,11 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
121
121
  connector_manifest.config, self.config.provided_configs
122
122
  )
123
123
  connector_manifest.url = connector_url
124
- connector_manifest.topic_names = self._get_connector_topics(connector_name)
124
+ connector_manifest.topic_names = self._get_connector_topics(
125
+ connector_name=connector_name,
126
+ config=connector_manifest.config,
127
+ connector_type=connector_manifest.type,
128
+ )
125
129
  connector_class_value = connector_manifest.config.get(CONNECTOR_CLASS) or ""
126
130
 
127
131
  class_type: Type[BaseConnector] = BaseConnector
@@ -203,7 +207,9 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
203
207
 
204
208
  return response.json()
205
209
 
206
- def _get_connector_topics(self, connector_name: str) -> List[str]:
210
+ def _get_connector_topics(
211
+ self, connector_name: str, config: Dict[str, str], connector_type: str
212
+ ) -> List[str]:
207
213
  try:
208
214
  response = self.session.get(
209
215
  f"{self.config.connect_uri}/connectors/{connector_name}/topics",
@@ -215,7 +221,21 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
215
221
  )
216
222
  return []
217
223
 
218
- return response.json()[connector_name]["topics"]
224
+ processed_topics = response.json()[connector_name]["topics"]
225
+
226
+ if connector_type == SINK:
227
+ try:
228
+ return SinkTopicFilter().filter_stale_topics(processed_topics, config)
229
+ except Exception as e:
230
+ self.report.warning(
231
+ title="Error parsing sink conector topics configuration",
232
+ message="Some stale lineage tasks might show up for connector",
233
+ context=connector_name,
234
+ exc=e,
235
+ )
236
+ return processed_topics
237
+ else:
238
+ return processed_topics
219
239
 
220
240
  def construct_flow_workunit(self, connector: ConnectorManifest) -> MetadataWorkUnit:
221
241
  connector_name = connector.name
@@ -359,3 +379,76 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
359
379
  return builder.make_dataset_urn_with_platform_instance(
360
380
  platform, name, platform_instance, self.config.env
361
381
  )
382
+
383
+
384
+ class SinkTopicFilter:
385
+ """Helper class to filter Kafka Connect topics based on configuration."""
386
+
387
+ def filter_stale_topics(
388
+ self,
389
+ processed_topics: List[str],
390
+ sink_config: Dict[str, str],
391
+ ) -> List[str]:
392
+ """
393
+ Kafka-connect's /topics API returns the set of topic names the connector has been using
394
+ since its creation or since the last time its set of active topics was reset. This means-
395
+ if a topic was ever used by a connector, it will be returned, even if it is no longer used.
396
+ To remove these stale topics from the list, we double-check the list returned by the API
397
+ against the sink connector's config.
398
+ Sink connectors configure exactly one of `topics` or `topics.regex`
399
+ https://kafka.apache.org/documentation/#sinkconnectorconfigs_topics
400
+
401
+ Args:
402
+ processed_topics: List of topics currently being processed
403
+ sink_config: Configuration dictionary for the sink connector
404
+
405
+ Returns:
406
+ List of filtered topics that match the configuration
407
+
408
+ Raises:
409
+ ValueError: If sink connector configuration is missing both 'topics' and 'topics.regex' fields
410
+
411
+ """
412
+ # Absence of topics config is a defensive NOOP,
413
+ # although this should never happen in real world
414
+ if not self.has_topic_config(sink_config):
415
+ logger.warning(
416
+ f"Found sink without topics config {sink_config.get(CONNECTOR_CLASS)}"
417
+ )
418
+ return processed_topics
419
+
420
+ # Handle explicit topic list
421
+ if sink_config.get("topics"):
422
+ return self._filter_by_topic_list(processed_topics, sink_config["topics"])
423
+ else:
424
+ # Handle regex pattern
425
+ return self._filter_by_topic_regex(
426
+ processed_topics, sink_config["topics.regex"]
427
+ )
428
+
429
+ def has_topic_config(self, sink_config: Dict[str, str]) -> bool:
430
+ """Check if sink config has either topics or topics.regex."""
431
+ return bool(sink_config.get("topics") or sink_config.get("topics.regex"))
432
+
433
+ def _filter_by_topic_list(
434
+ self, processed_topics: List[str], topics_config: str
435
+ ) -> List[str]:
436
+ """Filter topics based on explicit topic list from config."""
437
+ config_topics = [
438
+ topic.strip() for topic in topics_config.split(",") if topic.strip()
439
+ ]
440
+ return [topic for topic in processed_topics if topic in config_topics]
441
+
442
+ def _filter_by_topic_regex(
443
+ self, processed_topics: List[str], regex_pattern: str
444
+ ) -> List[str]:
445
+ """Filter topics based on regex pattern from config."""
446
+ from java.util.regex import Pattern
447
+
448
+ regex_matcher = Pattern.compile(regex_pattern)
449
+
450
+ return [
451
+ topic
452
+ for topic in processed_topics
453
+ if regex_matcher.matcher(topic).matches()
454
+ ]
@@ -175,7 +175,7 @@ class BigQuerySinkConnector(BaseConnector):
175
175
  class BQParser:
176
176
  project: str
177
177
  target_platform: str
178
- sanitizeTopics: str
178
+ sanitizeTopics: bool
179
179
  transforms: list
180
180
  topicsToTables: Optional[str] = None
181
181
  datasets: Optional[str] = None
@@ -187,7 +187,7 @@ class BigQuerySinkConnector(BaseConnector):
187
187
  connector_manifest: ConnectorManifest,
188
188
  ) -> BQParser:
189
189
  project = connector_manifest.config["project"]
190
- sanitizeTopics = connector_manifest.config.get("sanitizeTopics", "false")
190
+ sanitizeTopics = connector_manifest.config.get("sanitizeTopics") or "false"
191
191
  transform_names = (
192
192
  self.connector_manifest.config.get("transforms", "").split(",")
193
193
  if self.connector_manifest.config.get("transforms")
@@ -107,9 +107,9 @@ class ConfluentJDBCSourceConnector(BaseConnector):
107
107
  assert database_name
108
108
  db_connection_url = f"{url_instance.drivername}://{url_instance.host}:{url_instance.port}/{database_name}"
109
109
 
110
- topic_prefix = self.connector_manifest.config.get("topic.prefix", None)
110
+ topic_prefix = self.connector_manifest.config.get("topic.prefix") or ""
111
111
 
112
- query = self.connector_manifest.config.get("query", None)
112
+ query = self.connector_manifest.config.get("query") or ""
113
113
 
114
114
  transform_names = (
115
115
  self.connector_manifest.config.get("transforms", "").split(",")
@@ -731,6 +731,9 @@ fingerprinted_queries as (
731
731
  JOIN filtered_access_history a USING (query_id)
732
732
  )
733
733
  SELECT * FROM query_access_history
734
+ -- Our query aggregator expects the queries to be added in chronological order.
735
+ -- It's easier for us to push down the sorting to Snowflake/SQL instead of doing it in Python.
736
+ ORDER BY QUERY_START_TIME ASC
734
737
  """
735
738
 
736
739
 
@@ -1,5 +1,4 @@
1
1
  import logging
2
- import os
3
2
  import pathlib
4
3
  from typing import Any, Dict, Optional
5
4
 
@@ -8,11 +7,10 @@ import deepdiff
8
7
  from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
9
8
  from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
10
9
  from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult, sqlglot_lineage
10
+ from datahub.testing.pytest_hooks import get_golden_settings
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
- UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true"
15
-
16
14
 
17
15
  def assert_sql_result_with_resolver(
18
16
  sql: str,
@@ -22,6 +20,8 @@ def assert_sql_result_with_resolver(
22
20
  allow_table_error: bool = False,
23
21
  **kwargs: Any,
24
22
  ) -> None:
23
+ settings = get_golden_settings()
24
+
25
25
  # HACK: Our BigQuery source overwrites this value and doesn't undo it.
26
26
  # As such, we need to handle that here.
27
27
  BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "_yyyymmdd"
@@ -47,15 +47,14 @@ def assert_sql_result_with_resolver(
47
47
  )
48
48
 
49
49
  txt = res.json(indent=4)
50
- if UPDATE_FILES:
50
+ if settings.update_golden:
51
51
  expected_file.write_text(txt)
52
52
  return
53
53
 
54
54
  if not expected_file.exists():
55
55
  expected_file.write_text(txt)
56
56
  raise AssertionError(
57
- f"Expected file {expected_file} does not exist. "
58
- "Created it with the expected output. Please verify it."
57
+ f"Missing expected golden file; run with --update-golden-files to create it: {expected_file}"
59
58
  )
60
59
 
61
60
  expected = SqlParsingResult.parse_raw(expected_file.read_text())
@@ -16,6 +16,7 @@ from deepdiff import DeepDiff
16
16
  from datahub.ingestion.sink.file import write_metadata_file
17
17
  from datahub.ingestion.source.file import read_metadata_file
18
18
  from datahub.testing.mcp_diff import CannotCompareMCPs, MCPDiff, get_aspects_by_urn
19
+ from datahub.testing.pytest_hooks import get_golden_settings
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
@@ -40,26 +41,26 @@ def load_json_file(filename: Union[str, os.PathLike]) -> MetadataJson:
40
41
  def assert_metadata_files_equal(
41
42
  output_path: Union[str, os.PathLike],
42
43
  golden_path: Union[str, os.PathLike],
43
- update_golden: bool,
44
- copy_output: bool,
45
44
  ignore_paths: Sequence[str] = (),
46
45
  ignore_paths_v2: Sequence[str] = (),
47
46
  ignore_order: bool = True,
48
47
  ) -> None:
48
+ settings = get_golden_settings()
49
+
49
50
  golden_exists = os.path.isfile(golden_path)
50
51
 
51
- if copy_output:
52
+ if settings.copy_output:
52
53
  shutil.copyfile(str(output_path), str(golden_path) + ".output")
53
54
  logger.info(f"Copied output file to {golden_path}.output")
54
55
 
55
- if not update_golden and not golden_exists:
56
+ if not settings.update_golden and not golden_exists:
56
57
  raise FileNotFoundError(
57
58
  "Golden file does not exist. Please run with the --update-golden-files option to create."
58
59
  )
59
60
 
60
61
  output = load_json_file(output_path)
61
62
 
62
- if update_golden and not golden_exists:
63
+ if settings.update_golden and not golden_exists:
63
64
  shutil.copyfile(str(output_path), str(golden_path))
64
65
  return
65
66
  else:
@@ -87,7 +88,7 @@ def assert_metadata_files_equal(
87
88
  ignore_paths = (*ignore_paths, *default_exclude_paths)
88
89
 
89
90
  diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
90
- if diff and update_golden:
91
+ if diff and settings.update_golden:
91
92
  if isinstance(diff, MCPDiff) and diff.is_delta_valid:
92
93
  logger.info(f"Applying delta to golden file {golden_path}")
93
94
  diff.apply_delta(golden)
@@ -0,0 +1,56 @@
1
+ import dataclasses
2
+ from typing import Optional
3
+
4
+ import pytest
5
+
6
+ __all__ = [
7
+ "load_golden_flags",
8
+ "get_golden_settings",
9
+ "pytest_addoption",
10
+ "GoldenFileSettings",
11
+ ]
12
+
13
+
14
+ @dataclasses.dataclass
15
+ class GoldenFileSettings:
16
+ update_golden: bool
17
+ copy_output: bool
18
+
19
+
20
+ _registered: bool = False
21
+ _settings: Optional[GoldenFileSettings] = None
22
+
23
+
24
+ def pytest_addoption(parser: pytest.Parser) -> None:
25
+ parser.addoption(
26
+ "--update-golden-files",
27
+ action="store_true",
28
+ default=False,
29
+ )
30
+
31
+ # TODO: Deprecate and remove this flag.
32
+ parser.addoption("--copy-output-files", action="store_true", default=False)
33
+
34
+ global _registered
35
+ _registered = True
36
+
37
+
38
+ @pytest.fixture(scope="session", autouse=True)
39
+ def load_golden_flags(pytestconfig: pytest.Config) -> None:
40
+ global _settings
41
+ _settings = GoldenFileSettings(
42
+ update_golden=pytestconfig.getoption("--update-golden-files"),
43
+ copy_output=pytestconfig.getoption("--copy-output-files"),
44
+ )
45
+
46
+
47
+ def get_golden_settings() -> GoldenFileSettings:
48
+ if not _registered:
49
+ raise ValueError(
50
+ "Golden files aren't set up properly. Call register_golden_flags from a conftest pytest_addoptions method."
51
+ )
52
+ if not _settings:
53
+ raise ValueError(
54
+ "Golden files aren't set up properly. Ensure load_golden_flags is imported in your conftest."
55
+ )
56
+ return _settings