acryl-datahub 1.2.0.4rc3__py3-none-any.whl → 1.2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (43) hide show
  1. {acryl_datahub-1.2.0.4rc3.dist-info → acryl_datahub-1.2.0.5.dist-info}/METADATA +2454 -2454
  2. {acryl_datahub-1.2.0.4rc3.dist-info → acryl_datahub-1.2.0.5.dist-info}/RECORD +43 -41
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +1 -1
  5. datahub/api/entities/external/external_entities.py +500 -15
  6. datahub/ingestion/source/aws/glue.py +18 -14
  7. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  8. datahub/ingestion/source/aws/tag_entities.py +82 -104
  9. datahub/ingestion/source/common/subtypes.py +1 -0
  10. datahub/ingestion/source/hex/api.py +2 -0
  11. datahub/ingestion/source/hex/mapper.py +16 -2
  12. datahub/ingestion/source/hex/model.py +2 -0
  13. datahub/ingestion/source/looker/looker_common.py +26 -0
  14. datahub/ingestion/source/mock_data/datahub_mock_data.py +11 -1
  15. datahub/ingestion/source/snowflake/constants.py +1 -0
  16. datahub/ingestion/source/snowflake/snowflake_config.py +8 -1
  17. datahub/ingestion/source/snowflake/snowflake_queries.py +49 -6
  18. datahub/ingestion/source/snowflake/snowflake_query.py +50 -5
  19. datahub/ingestion/source/snowflake/snowflake_schema.py +173 -9
  20. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +25 -2
  21. datahub/ingestion/source/snowflake/snowflake_utils.py +16 -3
  22. datahub/ingestion/source/snowflake/snowflake_v2.py +4 -1
  23. datahub/ingestion/source/sql/mssql/source.py +2 -25
  24. datahub/ingestion/source/sql/mysql.py +54 -0
  25. datahub/ingestion/source/sql/postgres.py +5 -134
  26. datahub/ingestion/source/sql/sql_common.py +137 -0
  27. datahub/ingestion/source/superset.py +140 -56
  28. datahub/ingestion/source/unity/config.py +11 -0
  29. datahub/ingestion/source/unity/connection_test.py +1 -0
  30. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  31. datahub/ingestion/source/unity/proxy.py +20 -6
  32. datahub/ingestion/source/unity/report.py +9 -1
  33. datahub/ingestion/source/unity/source.py +51 -16
  34. datahub/ingestion/source/unity/tag_entities.py +49 -147
  35. datahub/metadata/_internal_schema_classes.py +514 -514
  36. datahub/metadata/_urns/urn_defs.py +1684 -1684
  37. datahub/metadata/schema.avsc +16680 -16281
  38. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  39. datahub/metadata/schemas/Operation.avsc +4 -2
  40. {acryl_datahub-1.2.0.4rc3.dist-info → acryl_datahub-1.2.0.5.dist-info}/WHEEL +0 -0
  41. {acryl_datahub-1.2.0.4rc3.dist-info → acryl_datahub-1.2.0.5.dist-info}/entry_points.txt +0 -0
  42. {acryl_datahub-1.2.0.4rc3.dist-info → acryl_datahub-1.2.0.5.dist-info}/licenses/LICENSE +0 -0
  43. {acryl_datahub-1.2.0.4rc3.dist-info → acryl_datahub-1.2.0.5.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ from datahub.ingestion.source.snowflake.snowflake_config import (
8
8
  )
9
9
  from datahub.utilities.prefix_batch_builder import PrefixGroup
10
10
 
11
- SHOW_VIEWS_MAX_PAGE_SIZE = 10000
11
+ SHOW_COMMAND_MAX_PAGE_SIZE = 10000
12
12
  SHOW_STREAM_MAX_PAGE_SIZE = 10000
13
13
 
14
14
 
@@ -38,12 +38,23 @@ class SnowflakeQuery:
38
38
  SnowflakeObjectDomain.MATERIALIZED_VIEW.capitalize(),
39
39
  SnowflakeObjectDomain.ICEBERG_TABLE.capitalize(),
40
40
  SnowflakeObjectDomain.STREAM.capitalize(),
41
+ SnowflakeObjectDomain.DYNAMIC_TABLE.capitalize(),
41
42
  }
42
43
 
43
44
  ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER = "({})".format(
44
45
  ",".join(f"'{domain}'" for domain in ACCESS_HISTORY_TABLE_VIEW_DOMAINS)
45
46
  )
46
47
 
48
+ # Domains that can be downstream tables in lineage
49
+ DOWNSTREAM_TABLE_DOMAINS = {
50
+ SnowflakeObjectDomain.TABLE.capitalize(),
51
+ SnowflakeObjectDomain.DYNAMIC_TABLE.capitalize(),
52
+ }
53
+
54
+ DOWNSTREAM_TABLE_DOMAINS_FILTER = "({})".format(
55
+ ",".join(f"'{domain}'" for domain in DOWNSTREAM_TABLE_DOMAINS)
56
+ )
57
+
47
58
  @staticmethod
48
59
  def current_account() -> str:
49
60
  return "select CURRENT_ACCOUNT()"
@@ -235,7 +246,7 @@ class SnowflakeQuery:
235
246
  @staticmethod
236
247
  def show_views_for_database(
237
248
  db_name: str,
238
- limit: int = SHOW_VIEWS_MAX_PAGE_SIZE,
249
+ limit: int = SHOW_COMMAND_MAX_PAGE_SIZE,
239
250
  view_pagination_marker: Optional[str] = None,
240
251
  ) -> str:
241
252
  # While there is an information_schema.views view, that only shows the view definition if the role
@@ -244,7 +255,7 @@ class SnowflakeQuery:
244
255
 
245
256
  # SHOW VIEWS can return a maximum of 10000 rows.
246
257
  # https://docs.snowflake.com/en/sql-reference/sql/show-views#usage-notes
247
- assert limit <= SHOW_VIEWS_MAX_PAGE_SIZE
258
+ assert limit <= SHOW_COMMAND_MAX_PAGE_SIZE
248
259
 
249
260
  # To work around this, we paginate through the results using the FROM clause.
250
261
  from_clause = (
@@ -686,7 +697,7 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
686
697
  AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
687
698
  AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)
688
699
  AND upstream_table_domain in {allowed_upstream_table_domains}
689
- AND downstream_table_domain = '{SnowflakeObjectDomain.TABLE.capitalize()}'
700
+ AND downstream_table_domain in {SnowflakeQuery.DOWNSTREAM_TABLE_DOMAINS_FILTER}
690
701
  {("AND " + upstream_sql_filter) if upstream_sql_filter else ""}
691
702
  ),
692
703
  column_upstream_jobs AS (
@@ -843,7 +854,7 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
843
854
  AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
844
855
  AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)
845
856
  AND upstream_table_domain in {allowed_upstream_table_domains}
846
- AND downstream_table_domain = '{SnowflakeObjectDomain.TABLE.capitalize()}'
857
+ AND downstream_table_domain in {SnowflakeQuery.DOWNSTREAM_TABLE_DOMAINS_FILTER}
847
858
  {("AND " + upstream_sql_filter) if upstream_sql_filter else ""}
848
859
  ),
849
860
  table_upstream_jobs_unique AS (
@@ -940,3 +951,37 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
940
951
  f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
941
952
  )
942
953
  return f"""SHOW STREAMS IN DATABASE "{db_name}" LIMIT {limit} {from_clause};"""
954
+
955
+ @staticmethod
956
+ def show_dynamic_tables_for_database(
957
+ db_name: str,
958
+ limit: int = SHOW_COMMAND_MAX_PAGE_SIZE,
959
+ dynamic_table_pagination_marker: Optional[str] = None,
960
+ ) -> str:
961
+ """Get dynamic table definitions using SHOW DYNAMIC TABLES."""
962
+ assert limit <= SHOW_COMMAND_MAX_PAGE_SIZE
963
+
964
+ from_clause = (
965
+ f"""FROM '{dynamic_table_pagination_marker}'"""
966
+ if dynamic_table_pagination_marker
967
+ else ""
968
+ )
969
+ return f"""\
970
+ SHOW DYNAMIC TABLES IN DATABASE "{db_name}"
971
+ LIMIT {limit} {from_clause};
972
+ """
973
+
974
+ @staticmethod
975
+ def get_dynamic_table_graph_history(db_name: str) -> str:
976
+ """Get dynamic table dependency information from information schema."""
977
+ return f"""
978
+ SELECT
979
+ name,
980
+ inputs,
981
+ target_lag_type,
982
+ target_lag_sec,
983
+ scheduling_state,
984
+ alter_trigger
985
+ FROM TABLE("{db_name}".INFORMATION_SCHEMA.DYNAMIC_TABLE_GRAPH_HISTORY())
986
+ ORDER BY name
987
+ """
@@ -3,16 +3,17 @@ import os
3
3
  from collections import defaultdict
4
4
  from dataclasses import dataclass, field
5
5
  from datetime import datetime
6
- from typing import Callable, Dict, Iterable, List, MutableMapping, Optional
6
+ from typing import Any, Callable, Dict, Iterable, List, MutableMapping, Optional
7
7
 
8
8
  from datahub.ingestion.api.report import SupportsAsObj
9
9
  from datahub.ingestion.source.common.subtypes import DatasetSubTypes
10
10
  from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
11
11
  from datahub.ingestion.source.snowflake.snowflake_connection import SnowflakeConnection
12
12
  from datahub.ingestion.source.snowflake.snowflake_query import (
13
- SHOW_VIEWS_MAX_PAGE_SIZE,
13
+ SHOW_COMMAND_MAX_PAGE_SIZE,
14
14
  SnowflakeQuery,
15
15
  )
16
+ from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Report
16
17
  from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable, BaseView
17
18
  from datahub.ingestion.source.sql.stored_procedures.base import BaseProcedure
18
19
  from datahub.utilities.file_backed_collections import FileBackedDict
@@ -103,6 +104,17 @@ class SnowflakeTable(BaseTable):
103
104
  return DatasetSubTypes.TABLE
104
105
 
105
106
 
107
+ @dataclass
108
+ class SnowflakeDynamicTable(SnowflakeTable):
109
+ definition: Optional[str] = (
110
+ None # SQL query that defines the dynamic table's content
111
+ )
112
+ target_lag: Optional[str] = None # Refresh frequency (e.g., "1 HOUR", "30 MINUTES")
113
+
114
+ def get_subtype(self) -> DatasetSubTypes:
115
+ return DatasetSubTypes.DYNAMIC_TABLE
116
+
117
+
106
118
  @dataclass
107
119
  class SnowflakeView(BaseView):
108
120
  materialized: bool = False
@@ -226,8 +238,11 @@ class _SnowflakeTagCache:
226
238
 
227
239
 
228
240
  class SnowflakeDataDictionary(SupportsAsObj):
229
- def __init__(self, connection: SnowflakeConnection) -> None:
241
+ def __init__(
242
+ self, connection: SnowflakeConnection, report: SnowflakeV2Report
243
+ ) -> None:
230
244
  self.connection = connection
245
+ self.report = report
231
246
 
232
247
  def as_obj(self) -> Dict[str, Dict[str, int]]:
233
248
  # TODO: Move this into a proper report type that gets computed.
@@ -355,8 +370,11 @@ class SnowflakeDataDictionary(SupportsAsObj):
355
370
  if table["TABLE_SCHEMA"] not in tables:
356
371
  tables[table["TABLE_SCHEMA"]] = []
357
372
 
373
+ is_dynamic = table.get("IS_DYNAMIC", "NO").upper() == "YES"
374
+ table_cls = SnowflakeDynamicTable if is_dynamic else SnowflakeTable
375
+
358
376
  tables[table["TABLE_SCHEMA"]].append(
359
- SnowflakeTable(
377
+ table_cls(
360
378
  name=table["TABLE_NAME"],
361
379
  type=table["TABLE_TYPE"],
362
380
  created=table["CREATED"],
@@ -365,11 +383,15 @@ class SnowflakeDataDictionary(SupportsAsObj):
365
383
  rows_count=table["ROW_COUNT"],
366
384
  comment=table["COMMENT"],
367
385
  clustering_key=table["CLUSTERING_KEY"],
368
- is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
386
+ is_dynamic=is_dynamic,
369
387
  is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
370
388
  is_hybrid=table.get("IS_HYBRID", "NO").upper() == "YES",
371
389
  )
372
390
  )
391
+
392
+ # Populate dynamic table definitions
393
+ self.populate_dynamic_table_definitions(tables, db_name)
394
+
373
395
  return tables
374
396
 
375
397
  def get_tables_for_schema(
@@ -382,8 +404,11 @@ class SnowflakeDataDictionary(SupportsAsObj):
382
404
  )
383
405
 
384
406
  for table in cur:
407
+ is_dynamic = table.get("IS_DYNAMIC", "NO").upper() == "YES"
408
+ table_cls = SnowflakeDynamicTable if is_dynamic else SnowflakeTable
409
+
385
410
  tables.append(
386
- SnowflakeTable(
411
+ table_cls(
387
412
  name=table["TABLE_NAME"],
388
413
  type=table["TABLE_TYPE"],
389
414
  created=table["CREATED"],
@@ -392,16 +417,21 @@ class SnowflakeDataDictionary(SupportsAsObj):
392
417
  rows_count=table["ROW_COUNT"],
393
418
  comment=table["COMMENT"],
394
419
  clustering_key=table["CLUSTERING_KEY"],
395
- is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
420
+ is_dynamic=is_dynamic,
396
421
  is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
397
422
  is_hybrid=table.get("IS_HYBRID", "NO").upper() == "YES",
398
423
  )
399
424
  )
425
+
426
+ # Populate dynamic table definitions for just this schema
427
+ schema_tables = {schema_name: tables}
428
+ self.populate_dynamic_table_definitions(schema_tables, db_name)
429
+
400
430
  return tables
401
431
 
402
432
  @serialized_lru_cache(maxsize=1)
403
433
  def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]]:
404
- page_limit = SHOW_VIEWS_MAX_PAGE_SIZE
434
+ page_limit = SHOW_COMMAND_MAX_PAGE_SIZE
405
435
 
406
436
  views: Dict[str, List[SnowflakeView]] = {}
407
437
 
@@ -660,7 +690,7 @@ class SnowflakeDataDictionary(SupportsAsObj):
660
690
  def get_streams_for_database(
661
691
  self, db_name: str
662
692
  ) -> Dict[str, List[SnowflakeStream]]:
663
- page_limit = SHOW_VIEWS_MAX_PAGE_SIZE
693
+ page_limit = SHOW_COMMAND_MAX_PAGE_SIZE
664
694
 
665
695
  streams: Dict[str, List[SnowflakeStream]] = {}
666
696
 
@@ -743,3 +773,137 @@ class SnowflakeDataDictionary(SupportsAsObj):
743
773
  )
744
774
  )
745
775
  return procedures
776
+
777
+ @serialized_lru_cache(maxsize=1)
778
+ def get_dynamic_table_graph_info(self, db_name: str) -> Dict[str, Dict[str, Any]]:
779
+ """Get dynamic table dependency information from information schema."""
780
+ dt_graph_info: Dict[str, Dict[str, Any]] = {}
781
+ try:
782
+ cur = self.connection.query(
783
+ SnowflakeQuery.get_dynamic_table_graph_history(db_name)
784
+ )
785
+ for row in cur:
786
+ dt_name = row["NAME"]
787
+ dt_graph_info[dt_name] = {
788
+ "inputs": row.get("INPUTS"),
789
+ "target_lag_type": row.get("TARGET_LAG_TYPE"),
790
+ "target_lag_sec": row.get("TARGET_LAG_SEC"),
791
+ "scheduling_state": row.get("SCHEDULING_STATE"),
792
+ "alter_trigger": row.get("ALTER_TRIGGER"),
793
+ }
794
+ logger.debug(
795
+ f"Successfully retrieved graph info for {len(dt_graph_info)} dynamic tables in {db_name}"
796
+ )
797
+ except Exception as e:
798
+ self.report.warning(
799
+ "Failed to get dynamic table graph history",
800
+ db_name,
801
+ exc=e,
802
+ )
803
+
804
+ return dt_graph_info
805
+
806
+ @serialized_lru_cache(maxsize=1)
807
+ def get_dynamic_tables_with_definitions(
808
+ self, db_name: str
809
+ ) -> Dict[str, List[SnowflakeDynamicTable]]:
810
+ """Get dynamic tables with their definitions using SHOW DYNAMIC TABLES."""
811
+ page_limit = SHOW_COMMAND_MAX_PAGE_SIZE
812
+ dynamic_tables: Dict[str, List[SnowflakeDynamicTable]] = {}
813
+
814
+ # Get graph/dependency information (pass db_name)
815
+ dt_graph_info = self.get_dynamic_table_graph_info(db_name)
816
+
817
+ first_iteration = True
818
+ dt_pagination_marker: Optional[str] = None
819
+
820
+ while first_iteration or dt_pagination_marker is not None:
821
+ try:
822
+ cur = self.connection.query(
823
+ SnowflakeQuery.show_dynamic_tables_for_database(
824
+ db_name,
825
+ limit=page_limit,
826
+ dynamic_table_pagination_marker=dt_pagination_marker,
827
+ )
828
+ )
829
+
830
+ first_iteration = False
831
+ dt_pagination_marker = None
832
+ result_set_size = 0
833
+
834
+ for dt in cur:
835
+ result_set_size += 1
836
+
837
+ dt_name = dt["name"]
838
+ schema_name = dt["schema_name"]
839
+
840
+ if schema_name not in dynamic_tables:
841
+ dynamic_tables[schema_name] = []
842
+
843
+ # Get definition from SHOW result
844
+ definition = dt.get("text")
845
+
846
+ # Get target lag from SHOW result or graph info
847
+ target_lag = dt.get("target_lag")
848
+ if not target_lag and dt_graph_info:
849
+ qualified_name = f"{db_name}.{schema_name}.{dt_name}"
850
+ graph_info = dt_graph_info.get(qualified_name, {})
851
+ if graph_info.get("target_lag_type") and graph_info.get(
852
+ "target_lag_sec"
853
+ ):
854
+ target_lag = f"{graph_info['target_lag_sec']} {graph_info['target_lag_type']}"
855
+
856
+ dynamic_tables[schema_name].append(
857
+ SnowflakeDynamicTable(
858
+ name=dt_name,
859
+ created=dt["created_on"],
860
+ last_altered=dt.get("created_on"),
861
+ size_in_bytes=dt.get("bytes", 0),
862
+ rows_count=dt.get("rows", 0),
863
+ comment=dt.get("comment"),
864
+ definition=definition,
865
+ target_lag=target_lag,
866
+ is_dynamic=True,
867
+ type="DYNAMIC TABLE",
868
+ )
869
+ )
870
+
871
+ if result_set_size >= page_limit:
872
+ logger.info(
873
+ f"Fetching next page of dynamic tables for {db_name} - after {dt_name}"
874
+ )
875
+ dt_pagination_marker = dt_name
876
+
877
+ except Exception as e:
878
+ logger.debug(
879
+ f"Failed to get dynamic tables for database {db_name}: {e}"
880
+ )
881
+ break
882
+
883
+ return dynamic_tables
884
+
885
+ def populate_dynamic_table_definitions(
886
+ self, tables: Dict[str, List[SnowflakeTable]], db_name: str
887
+ ) -> None:
888
+ """Populate dynamic table definitions for tables that are marked as dynamic."""
889
+ try:
890
+ # Get dynamic tables with definitions from SHOW command
891
+ dt_with_definitions = self.get_dynamic_tables_with_definitions(db_name)
892
+
893
+ for schema_name, table_list in tables.items():
894
+ for table in table_list:
895
+ if (
896
+ isinstance(table, SnowflakeDynamicTable)
897
+ and table.definition is None
898
+ ):
899
+ # Find matching dynamic table from SHOW results
900
+ show_dt_list = dt_with_definitions.get(schema_name, [])
901
+ for show_dt in show_dt_list:
902
+ if show_dt.name == table.name:
903
+ table.definition = show_dt.definition
904
+ table.target_lag = show_dt.target_lag
905
+ break
906
+ except Exception as e:
907
+ logger.debug(
908
+ f"Failed to populate dynamic table definitions for {db_name}: {e}"
909
+ )
@@ -45,6 +45,7 @@ from datahub.ingestion.source.snowflake.snowflake_schema import (
45
45
  SnowflakeColumn,
46
46
  SnowflakeDatabase,
47
47
  SnowflakeDataDictionary,
48
+ SnowflakeDynamicTable,
48
49
  SnowflakeFK,
49
50
  SnowflakePK,
50
51
  SnowflakeSchema,
@@ -182,7 +183,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
182
183
  self.identifiers: SnowflakeIdentifierBuilder = identifiers
183
184
 
184
185
  self.data_dictionary: SnowflakeDataDictionary = SnowflakeDataDictionary(
185
- connection=self.connection
186
+ connection=self.connection, report=self.report
186
187
  )
187
188
  self.report.data_dictionary_cache = self.data_dictionary
188
189
 
@@ -495,6 +496,22 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
495
496
  if self.config.include_technical_schema:
496
497
  data_reader = self.make_data_reader()
497
498
  for table in tables:
499
+ # Handle dynamic table definitions for lineage
500
+ if (
501
+ isinstance(table, SnowflakeDynamicTable)
502
+ and table.definition
503
+ and self.aggregator
504
+ ):
505
+ table_identifier = self.identifiers.get_dataset_identifier(
506
+ table.name, schema_name, db_name
507
+ )
508
+ self.aggregator.add_view_definition(
509
+ view_urn=self.identifiers.gen_dataset_urn(table_identifier),
510
+ view_definition=table.definition,
511
+ default_db=db_name,
512
+ default_schema=schema_name,
513
+ )
514
+
498
515
  table_wu_generator = self._process_table(
499
516
  table, snowflake_schema, db_name
500
517
  )
@@ -935,6 +952,10 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
935
952
  }
936
953
  )
937
954
 
955
+ if isinstance(table, SnowflakeDynamicTable):
956
+ if table.target_lag:
957
+ custom_properties["TARGET_LAG"] = table.target_lag
958
+
938
959
  if isinstance(table, SnowflakeView) and table.is_secure:
939
960
  custom_properties["IS_SECURE"] = "true"
940
961
 
@@ -980,7 +1001,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
980
1001
  schema_name,
981
1002
  db_name,
982
1003
  (
983
- SnowflakeObjectDomain.TABLE
1004
+ SnowflakeObjectDomain.DYNAMIC_TABLE
1005
+ if isinstance(table, SnowflakeTable) and table.is_dynamic
1006
+ else SnowflakeObjectDomain.TABLE
984
1007
  if isinstance(table, SnowflakeTable)
985
1008
  else SnowflakeObjectDomain.VIEW
986
1009
  ),
@@ -93,9 +93,20 @@ class SnowsightUrlBuilder:
93
93
  table_name: str,
94
94
  schema_name: str,
95
95
  db_name: str,
96
- domain: Literal[SnowflakeObjectDomain.TABLE, SnowflakeObjectDomain.VIEW],
96
+ domain: Literal[
97
+ SnowflakeObjectDomain.TABLE,
98
+ SnowflakeObjectDomain.VIEW,
99
+ SnowflakeObjectDomain.DYNAMIC_TABLE,
100
+ ],
97
101
  ) -> Optional[str]:
98
- return f"{self.snowsight_base_url}#/data/databases/{db_name}/schemas/{schema_name}/{domain}/{table_name}/"
102
+ # For dynamic tables, use the dynamic-table domain in the URL path
103
+ # Ensure only explicitly dynamic tables use dynamic-table URL path
104
+ url_domain = (
105
+ "dynamic-table"
106
+ if domain == SnowflakeObjectDomain.DYNAMIC_TABLE
107
+ else str(domain)
108
+ )
109
+ return f"{self.snowsight_base_url}#/data/databases/{db_name}/schemas/{schema_name}/{url_domain}/{table_name}/"
99
110
 
100
111
  def get_external_url_for_schema(
101
112
  self, schema_name: str, db_name: str
@@ -129,6 +140,7 @@ class SnowflakeFilter:
129
140
  SnowflakeObjectDomain.MATERIALIZED_VIEW,
130
141
  SnowflakeObjectDomain.ICEBERG_TABLE,
131
142
  SnowflakeObjectDomain.STREAM,
143
+ SnowflakeObjectDomain.DYNAMIC_TABLE,
132
144
  ):
133
145
  return False
134
146
  if _is_sys_table(dataset_name):
@@ -160,7 +172,8 @@ class SnowflakeFilter:
160
172
  return False
161
173
 
162
174
  if dataset_type.lower() in {
163
- SnowflakeObjectDomain.TABLE
175
+ SnowflakeObjectDomain.TABLE,
176
+ SnowflakeObjectDomain.DYNAMIC_TABLE,
164
177
  } and not self.filter_config.table_pattern.allowed(
165
178
  _cleanup_qualified_name(dataset_name, self.structured_reporter)
166
179
  ):
@@ -171,7 +171,9 @@ class SnowflakeV2Source(
171
171
  )
172
172
 
173
173
  # For database, schema, tables, views, etc
174
- self.data_dictionary = SnowflakeDataDictionary(connection=self.connection)
174
+ self.data_dictionary = SnowflakeDataDictionary(
175
+ connection=self.connection, report=self.report
176
+ )
175
177
  self.lineage_extractor: Optional[SnowflakeLineageExtractor] = None
176
178
 
177
179
  self.discovered_datasets: Optional[List[str]] = None
@@ -600,6 +602,7 @@ class SnowflakeV2Source(
600
602
  include_query_usage_statistics=self.config.include_query_usage_statistics,
601
603
  user_email_pattern=self.config.user_email_pattern,
602
604
  pushdown_deny_usernames=self.config.pushdown_deny_usernames,
605
+ pushdown_allow_usernames=self.config.pushdown_allow_usernames,
603
606
  query_dedup_strategy=self.config.query_dedup_strategy,
604
607
  push_down_database_pattern_access_history=self.config.push_down_database_pattern_access_history,
605
608
  additional_database_names_allowlist=self.config.additional_database_names_allowlist,
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import re
3
3
  import urllib.parse
4
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
4
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
5
5
 
6
6
  import pydantic
7
7
  import sqlalchemy.dialects.mssql
@@ -41,7 +41,6 @@ from datahub.ingestion.source.sql.mssql.job_models import (
41
41
  )
42
42
  from datahub.ingestion.source.sql.sql_common import (
43
43
  SQLAlchemySource,
44
- SqlWorkUnit,
45
44
  register_custom_type,
46
45
  )
47
46
  from datahub.ingestion.source.sql.sql_config import (
@@ -346,28 +345,6 @@ class SQLServerSource(SQLAlchemySource):
346
345
  exc=e,
347
346
  )
348
347
 
349
- def get_schema_level_workunits(
350
- self,
351
- inspector: Inspector,
352
- schema: str,
353
- database: str,
354
- ) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
355
- yield from super().get_schema_level_workunits(
356
- inspector=inspector,
357
- schema=schema,
358
- database=database,
359
- )
360
- if self.config.include_stored_procedures:
361
- try:
362
- yield from self.loop_stored_procedures(inspector, schema, self.config)
363
- except Exception as e:
364
- self.report.failure(
365
- message="Failed to list stored procedures",
366
- title="SQL Server Stored Procedures Extraction",
367
- context="Error occurred during schema-level stored procedure extraction",
368
- exc=e,
369
- )
370
-
371
348
  def _detect_rds_environment(self, conn: Connection) -> bool:
372
349
  """
373
350
  Detect if we're running in an RDS/managed environment vs on-premises.
@@ -636,7 +613,7 @@ class SQLServerSource(SQLAlchemySource):
636
613
  self,
637
614
  inspector: Inspector,
638
615
  schema: str,
639
- sql_config: SQLServerConfig,
616
+ sql_config: SQLServerConfig, # type: ignore
640
617
  ) -> Iterable[MetadataWorkUnit]:
641
618
  """
642
619
  Loop schema data for get stored procedures as dataJob-s.
@@ -1,5 +1,7 @@
1
1
  # This import verifies that the dependencies are available.
2
2
 
3
+ from typing import List
4
+
3
5
  import pymysql # noqa: F401
4
6
  from pydantic.fields import Field
5
7
  from sqlalchemy import util
@@ -7,6 +9,7 @@ from sqlalchemy.dialects.mysql import BIT, base
7
9
  from sqlalchemy.dialects.mysql.enumerated import SET
8
10
  from sqlalchemy.engine.reflection import Inspector
9
11
 
12
+ from datahub.configuration.common import AllowDenyPattern
10
13
  from datahub.ingestion.api.decorators import (
11
14
  SourceCapability,
12
15
  SupportStatus,
@@ -20,6 +23,9 @@ from datahub.ingestion.source.sql.sql_common import (
20
23
  register_custom_type,
21
24
  )
22
25
  from datahub.ingestion.source.sql.sql_config import SQLAlchemyConnectionConfig
26
+ from datahub.ingestion.source.sql.stored_procedures.base import (
27
+ BaseProcedure,
28
+ )
23
29
  from datahub.ingestion.source.sql.two_tier_sql_source import (
24
30
  TwoTierSQLAlchemyConfig,
25
31
  TwoTierSQLAlchemySource,
@@ -58,6 +64,17 @@ class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
58
64
  def get_identifier(self, *, schema: str, table: str) -> str:
59
65
  return f"{schema}.{table}"
60
66
 
67
+ include_stored_procedures: bool = Field(
68
+ default=True,
69
+ description="Include ingest of stored procedures.",
70
+ )
71
+
72
+ procedure_pattern: AllowDenyPattern = Field(
73
+ default=AllowDenyPattern.allow_all(),
74
+ description="Regex patterns for stored procedures to filter in ingestion."
75
+ "Specify regex to match the entire procedure name in database.schema.procedure_name format. e.g. to match all procedures starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
76
+ )
77
+
61
78
 
62
79
  @platform_name("MySQL")
63
80
  @config_class(MySQLConfig)
@@ -95,3 +112,40 @@ class MySQLSource(TwoTierSQLAlchemySource):
95
112
  self.profile_metadata_info.dataset_name_to_storage_bytes[
96
113
  f"{row.TABLE_SCHEMA}.{row.TABLE_NAME}"
97
114
  ] = row.DATA_LENGTH
115
+
116
+ def get_procedures_for_schema(
117
+ self, inspector: Inspector, schema: str, db_name: str
118
+ ) -> List[BaseProcedure]:
119
+ """
120
+ Get stored procedures for a specific schema.
121
+ """
122
+ base_procedures = []
123
+ with inspector.engine.connect() as conn:
124
+ procedures = conn.execute(
125
+ """
126
+ SELECT ROUTINE_NAME AS name,
127
+ ROUTINE_DEFINITION AS definition,
128
+ EXTERNAL_LANGUAGE AS language
129
+ FROM information_schema.ROUTINES
130
+ WHERE ROUTINE_TYPE = 'PROCEDURE'
131
+ AND ROUTINE_SCHEMA = %s
132
+ """,
133
+ (schema,),
134
+ )
135
+
136
+ procedure_rows = list(procedures)
137
+ for row in procedure_rows:
138
+ base_procedures.append(
139
+ BaseProcedure(
140
+ name=row.name,
141
+ language=row.language,
142
+ argument_signature=None,
143
+ return_type=None,
144
+ procedure_definition=row.definition,
145
+ created=None,
146
+ last_altered=None,
147
+ extra_properties=None,
148
+ comment=None,
149
+ )
150
+ )
151
+ return base_procedures