acryl-datahub 1.2.0.4rc4__py3-none-any.whl → 1.2.0.5rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (38) hide show
  1. {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/METADATA +2410 -2410
  2. {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/RECORD +38 -36
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +1 -1
  5. datahub/api/entities/external/external_entities.py +500 -15
  6. datahub/ingestion/source/aws/glue.py +18 -14
  7. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  8. datahub/ingestion/source/aws/tag_entities.py +82 -104
  9. datahub/ingestion/source/common/subtypes.py +1 -0
  10. datahub/ingestion/source/hex/api.py +2 -0
  11. datahub/ingestion/source/hex/mapper.py +16 -2
  12. datahub/ingestion/source/hex/model.py +2 -0
  13. datahub/ingestion/source/looker/looker_common.py +26 -0
  14. datahub/ingestion/source/snowflake/constants.py +1 -0
  15. datahub/ingestion/source/snowflake/snowflake_query.py +50 -5
  16. datahub/ingestion/source/snowflake/snowflake_schema.py +173 -9
  17. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +25 -2
  18. datahub/ingestion/source/snowflake/snowflake_utils.py +16 -3
  19. datahub/ingestion/source/snowflake/snowflake_v2.py +3 -1
  20. datahub/ingestion/source/sql/mssql/source.py +2 -25
  21. datahub/ingestion/source/sql/mysql.py +54 -0
  22. datahub/ingestion/source/sql/postgres.py +5 -134
  23. datahub/ingestion/source/sql/sql_common.py +137 -0
  24. datahub/ingestion/source/superset.py +140 -56
  25. datahub/ingestion/source/unity/config.py +11 -0
  26. datahub/ingestion/source/unity/connection_test.py +1 -0
  27. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  28. datahub/ingestion/source/unity/proxy.py +20 -6
  29. datahub/ingestion/source/unity/report.py +9 -1
  30. datahub/ingestion/source/unity/source.py +51 -16
  31. datahub/ingestion/source/unity/tag_entities.py +49 -147
  32. datahub/metadata/_internal_schema_classes.py +1 -1
  33. datahub/metadata/schema.avsc +4 -2
  34. datahub/metadata/schemas/Operation.avsc +4 -2
  35. {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/WHEEL +0 -0
  36. {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/entry_points.txt +0 -0
  37. {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/licenses/LICENSE +0 -0
  38. {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,10 @@
1
1
  import logging
2
- from typing import List, Optional
2
+ from typing import TYPE_CHECKING, List, Optional
3
+
4
+ if TYPE_CHECKING:
5
+ from datahub.ingestion.source.aws.platform_resource_repository import (
6
+ GluePlatformResourceRepository,
7
+ )
3
8
 
4
9
  from pydantic import BaseModel
5
10
 
@@ -7,7 +12,6 @@ from datahub.api.entities.external.external_entities import (
7
12
  ExternalEntity,
8
13
  ExternalEntityId,
9
14
  LinkedResourceSet,
10
- PlatformResourceRepository,
11
15
  )
12
16
  from datahub.api.entities.external.lake_formation_external_entites import (
13
17
  LakeFormationTag,
@@ -15,10 +19,8 @@ from datahub.api.entities.external.lake_formation_external_entites import (
15
19
  from datahub.api.entities.platformresource.platform_resource import (
16
20
  PlatformResource,
17
21
  PlatformResourceKey,
18
- PlatformResourceSearchFields,
19
22
  )
20
23
  from datahub.metadata.urns import TagUrn
21
- from datahub.utilities.search_utils import ElasticDocumentQuery
22
24
  from datahub.utilities.urns.urn import Urn
23
25
 
24
26
  logger = logging.getLogger(__name__)
@@ -29,8 +31,12 @@ class LakeFormationTagSyncContext(BaseModel):
29
31
  platform_instance: Optional[str] = None
30
32
  catalog: Optional[str] = None
31
33
 
34
+ # Making it compatible with SyncContext interface
35
+ def get_platform_instance(self) -> Optional[str]:
36
+ return self.platform_instance
37
+
32
38
 
33
- class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
39
+ class LakeFormationTagPlatformResourceId(ExternalEntityId):
34
40
  """
35
41
  A LakeFormationTag is a unique identifier for a Lakeformation tag.
36
42
  """
@@ -42,9 +48,6 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
42
48
  exists_in_lake_formation: bool = False
43
49
  persisted: bool = False
44
50
 
45
- def __hash__(self) -> int:
46
- return hash(self.to_platform_resource_key().id)
47
-
48
51
  # this is a hack to make sure the property is a string and not private pydantic field
49
52
  @staticmethod
50
53
  def _RESOURCE_TYPE() -> str:
@@ -61,24 +64,26 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
61
64
  )
62
65
 
63
66
  @classmethod
64
- def from_tag(
67
+ def get_or_create_from_tag(
65
68
  cls,
66
69
  tag: LakeFormationTag,
67
- platform_instance: Optional[str],
68
- platform_resource_repository: PlatformResourceRepository,
69
- catalog: Optional[str] = None,
70
+ platform_resource_repository: "GluePlatformResourceRepository",
70
71
  exists_in_lake_formation: bool = False,
72
+ catalog_id: Optional[str] = None,
71
73
  ) -> "LakeFormationTagPlatformResourceId":
72
74
  """
73
75
  Creates a LakeFormationTagPlatformResourceId from a LakeFormationTag.
74
76
  """
75
77
 
78
+ # Use catalog_id if provided, otherwise fall back to repository catalog
79
+ effective_catalog = catalog_id or platform_resource_repository.catalog
80
+
76
81
  existing_platform_resource = cls.search_by_urn(
77
82
  tag.to_datahub_tag_urn().urn(),
78
83
  platform_resource_repository=platform_resource_repository,
79
84
  tag_sync_context=LakeFormationTagSyncContext(
80
- platform_instance=platform_instance,
81
- catalog=catalog,
85
+ platform_instance=platform_resource_repository.platform_instance,
86
+ catalog=effective_catalog,
82
87
  ),
83
88
  )
84
89
  if existing_platform_resource:
@@ -90,9 +95,9 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
90
95
  return LakeFormationTagPlatformResourceId(
91
96
  tag_key=str(tag.key),
92
97
  tag_value=str(tag.value) if tag.value is not None else None,
93
- platform_instance=platform_instance,
98
+ platform_instance=platform_resource_repository.platform_instance,
99
+ catalog=effective_catalog,
94
100
  exists_in_lake_formation=exists_in_lake_formation,
95
- catalog=catalog,
96
101
  persisted=False,
97
102
  )
98
103
 
@@ -100,64 +105,48 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
100
105
  def search_by_urn(
101
106
  cls,
102
107
  urn: str,
103
- platform_resource_repository: PlatformResourceRepository,
108
+ platform_resource_repository: "GluePlatformResourceRepository",
104
109
  tag_sync_context: LakeFormationTagSyncContext,
105
110
  ) -> Optional["LakeFormationTagPlatformResourceId"]:
106
- mapped_tags = [
107
- t
108
- for t in platform_resource_repository.search_by_filter(
109
- ElasticDocumentQuery.create_from(
110
- (
111
- PlatformResourceSearchFields.RESOURCE_TYPE,
112
- str(LakeFormationTagPlatformResourceId._RESOURCE_TYPE()),
113
- ),
114
- (PlatformResourceSearchFields.SECONDARY_KEYS, urn),
111
+ """
112
+ Search for existing Lake Formation tag entity by URN using repository caching.
113
+
114
+ This method now delegates to the repository's search_entity_by_urn method to ensure
115
+ consistent caching behavior across all platform implementations.
116
+ """
117
+ # Use repository's cached search method instead of duplicating search logic
118
+ existing_entity_id = platform_resource_repository.search_entity_by_urn(urn)
119
+
120
+ if existing_entity_id:
121
+ # Verify platform instance and catalog match
122
+ if (
123
+ existing_entity_id.platform_instance
124
+ == tag_sync_context.platform_instance
125
+ and existing_entity_id.catalog == tag_sync_context.catalog
126
+ ):
127
+ logger.info(
128
+ f"Found existing LakeFormationTagPlatformResourceId for URN {urn}: {existing_entity_id}"
115
129
  )
116
- )
117
- ]
130
+ # Create a new ID with the correct state instead of mutating
131
+ return LakeFormationTagPlatformResourceId(
132
+ tag_key=existing_entity_id.tag_key,
133
+ tag_value=existing_entity_id.tag_value,
134
+ platform_instance=existing_entity_id.platform_instance,
135
+ catalog=existing_entity_id.catalog,
136
+ exists_in_lake_formation=True, # This tag exists in Lake Formation
137
+ persisted=True, # And it's persisted in DataHub
138
+ )
139
+
118
140
  logger.info(
119
- f"Found {len(mapped_tags)} mapped tags for URN {urn}. {mapped_tags}"
141
+ f"No mapped tag found for URN {urn} with platform instance {tag_sync_context.platform_instance}. Creating a new LakeFormationTagPlatformResourceId."
120
142
  )
121
- if len(mapped_tags) > 0:
122
- for platform_resource in mapped_tags:
123
- if (
124
- platform_resource.resource_info
125
- and platform_resource.resource_info.value
126
- ):
127
- lake_formation_tag_platform_resource = (
128
- LakeFormationTagPlatformResource(
129
- **platform_resource.resource_info.value.as_pydantic_object(
130
- LakeFormationTagPlatformResource
131
- ).dict()
132
- )
133
- )
134
- if (
135
- lake_formation_tag_platform_resource.id.platform_instance
136
- == tag_sync_context.platform_instance
137
- and lake_formation_tag_platform_resource.id.catalog
138
- == tag_sync_context.catalog
139
- ):
140
- lake_formation_tag_id = lake_formation_tag_platform_resource.id
141
- lake_formation_tag_id.exists_in_lake_formation = True
142
- lake_formation_tag_id.persisted = True
143
- return lake_formation_tag_id
144
- else:
145
- logger.warning(
146
- f"Platform resource {platform_resource} does not have a resource_info value"
147
- )
148
- continue
149
-
150
- # If we reach here, it means we did not find a mapped tag for the URN
151
- logger.info(
152
- f"No mapped tag found for URN {urn} with platform instance {tag_sync_context.platform_instance}. Creating a new LakeFormationTagPlatformResourceId."
153
- )
154
143
  return None
155
144
 
156
145
  @classmethod
157
146
  def from_datahub_urn(
158
147
  cls,
159
148
  urn: str,
160
- platform_resource_repository: PlatformResourceRepository,
149
+ platform_resource_repository: "GluePlatformResourceRepository",
161
150
  tag_sync_context: LakeFormationTagSyncContext,
162
151
  ) -> "LakeFormationTagPlatformResourceId":
163
152
  """
@@ -188,11 +177,17 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
188
177
  logger.info(
189
178
  f"Tag {new_tag_id} already exists in platform resource repository with {resource_key}"
190
179
  )
191
- new_tag_id.exists_in_lake_formation = (
192
- True # TODO: Check if this is a safe assumption
180
+ # Create a new ID with the correct state instead of mutating
181
+ return LakeFormationTagPlatformResourceId(
182
+ tag_key=new_tag_id.tag_key,
183
+ tag_value=new_tag_id.tag_value,
184
+ platform_instance=new_tag_id.platform_instance,
185
+ catalog=new_tag_id.catalog,
186
+ exists_in_lake_formation=True, # This tag exists in Lake Formation
187
+ persisted=new_tag_id.persisted,
193
188
  )
194
189
  return new_tag_id
195
- raise ValueError(f"Unable to create SnowflakeTagId from DataHub URN: {urn}")
190
+ raise ValueError(f"Unable to create LakeFormationTagId from DataHub URN: {urn}")
196
191
 
197
192
  @classmethod
198
193
  def generate_tag_id(
@@ -223,7 +218,7 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
223
218
  )
224
219
 
225
220
 
226
- class LakeFormationTagPlatformResource(BaseModel, ExternalEntity):
221
+ class LakeFormationTagPlatformResource(ExternalEntity):
227
222
  datahub_urns: LinkedResourceSet
228
223
  managed_by_datahub: bool
229
224
  id: LakeFormationTagPlatformResourceId
@@ -246,46 +241,29 @@ class LakeFormationTagPlatformResource(BaseModel, ExternalEntity):
246
241
  )
247
242
 
248
243
  @classmethod
249
- def get_from_datahub(
244
+ def create_default(
250
245
  cls,
251
- lake_formation_tag_id: LakeFormationTagPlatformResourceId,
252
- platform_resource_repository: PlatformResourceRepository,
253
- managed_by_datahub: bool = False,
246
+ entity_id: ExternalEntityId,
247
+ managed_by_datahub: bool,
254
248
  ) -> "LakeFormationTagPlatformResource":
255
- # Search for linked DataHub URNs
256
- platform_resources = [
257
- r
258
- for r in platform_resource_repository.search_by_filter(
259
- ElasticDocumentQuery.create_from(
260
- (
261
- PlatformResourceSearchFields.RESOURCE_TYPE,
262
- str(LakeFormationTagPlatformResourceId._RESOURCE_TYPE()),
263
- ),
264
- (
265
- PlatformResourceSearchFields.PRIMARY_KEY,
266
- f"{lake_formation_tag_id.tag_key}/{lake_formation_tag_id.tag_value}",
267
- ),
268
- )
269
- )
270
- ]
271
- for platform_resource in platform_resources:
272
- if (
273
- platform_resource.resource_info
274
- and platform_resource.resource_info.value
275
- ):
276
- lf_tag = LakeFormationTagPlatformResource(
277
- **platform_resource.resource_info.value.as_pydantic_object(
278
- LakeFormationTagPlatformResource
279
- ).dict()
280
- )
281
- if (
282
- lf_tag.id.platform_instance
283
- == lake_formation_tag_id.platform_instance
284
- and lf_tag.id.catalog == lake_formation_tag_id.catalog
285
- ):
286
- return lf_tag
249
+ """Create a default Lake Formation tag entity when none found in DataHub."""
250
+ # Type narrowing: we know this will be a LakeFormationTagPlatformResourceId
251
+ assert isinstance(entity_id, LakeFormationTagPlatformResourceId), (
252
+ f"Expected LakeFormationTagPlatformResourceId, got {type(entity_id)}"
253
+ )
254
+
255
+ # Create a new entity ID with correct default state instead of mutating
256
+ default_entity_id = LakeFormationTagPlatformResourceId(
257
+ tag_key=entity_id.tag_key,
258
+ tag_value=entity_id.tag_value,
259
+ platform_instance=entity_id.platform_instance,
260
+ catalog=entity_id.catalog,
261
+ exists_in_lake_formation=False, # New entities don't exist in Lake Formation yet
262
+ persisted=False, # New entities are not persisted yet
263
+ )
264
+
287
265
  return cls(
288
- id=lake_formation_tag_id,
266
+ id=default_entity_id,
289
267
  datahub_urns=LinkedResourceSet(urns=[]),
290
268
  managed_by_datahub=managed_by_datahub,
291
269
  allowed_values=None,
@@ -30,6 +30,7 @@ class DatasetSubTypes(StrEnum):
30
30
  NEO4J_NODE = "Neo4j Node"
31
31
  NEO4J_RELATIONSHIP = "Neo4j Relationship"
32
32
  SNOWFLAKE_STREAM = "Snowflake Stream"
33
+ DYNAMIC_TABLE = "Dynamic Table"
33
34
  API_ENDPOINT = "API Endpoint"
34
35
  SLACK_CHANNEL = "Slack Channel"
35
36
  PROJECTIONS = "Projections"
@@ -375,6 +375,7 @@ class HexApi:
375
375
  description=hex_item.description,
376
376
  created_at=hex_item.created_at,
377
377
  last_edited_at=hex_item.last_edited_at,
378
+ last_published_at=hex_item.last_published_at,
378
379
  status=status,
379
380
  categories=categories,
380
381
  collections=collections,
@@ -389,6 +390,7 @@ class HexApi:
389
390
  description=hex_item.description,
390
391
  created_at=hex_item.created_at,
391
392
  last_edited_at=hex_item.last_edited_at,
393
+ last_published_at=hex_item.last_published_at,
392
394
  status=status,
393
395
  categories=categories,
394
396
  collections=collections,
@@ -122,7 +122,7 @@ class Mapper:
122
122
  lastModified=self._change_audit_stamps(
123
123
  created_at=project.created_at, last_edited_at=project.last_edited_at
124
124
  ),
125
- externalUrl=f"{self._base_url}/{self._workspace_name}/hex/{project.id}",
125
+ externalUrl=self._get_project_or_component_external_url(project),
126
126
  customProperties=dict(id=project.id),
127
127
  datasetEdges=self._dataset_edges(project.upstream_datasets),
128
128
  # TODO: support schema field upstream, maybe InputFields?
@@ -173,7 +173,7 @@ class Mapper:
173
173
  lastModified=self._change_audit_stamps(
174
174
  created_at=component.created_at, last_edited_at=component.last_edited_at
175
175
  ),
176
- externalUrl=f"{self._base_url}/{self._workspace_name}/hex/{component.id}",
176
+ externalUrl=self._get_project_or_component_external_url(component),
177
177
  customProperties=dict(id=component.id),
178
178
  )
179
179
 
@@ -242,6 +242,20 @@ class Mapper:
242
242
  assert isinstance(dashboard_urn, DashboardUrn)
243
243
  return dashboard_urn
244
244
 
245
+ def _get_project_or_component_external_url(
246
+ self,
247
+ project_or_component: Union[Project, Component],
248
+ ) -> Optional[str]:
249
+ if project_or_component.last_published_at is None:
250
+ return (
251
+ f"{self._base_url}/{self._workspace_name}/hex/{project_or_component.id}"
252
+ )
253
+ else:
254
+ # published Projects/Components have a different URL that everybody, not just editors, can access
255
+ return (
256
+ f"{self._base_url}/{self._workspace_name}/app/{project_or_component.id}"
257
+ )
258
+
245
259
  def _change_audit_stamps(
246
260
  self, created_at: Optional[datetime], last_edited_at: Optional[datetime]
247
261
  ) -> ChangeAuditStampsClass:
@@ -46,6 +46,7 @@ class Project:
46
46
  title: str
47
47
  description: Optional[str]
48
48
  last_edited_at: Optional[datetime] = None
49
+ last_published_at: Optional[datetime] = None
49
50
  created_at: Optional[datetime] = None
50
51
  status: Optional[Status] = None
51
52
  categories: Optional[List[Category]] = None # TODO: emit category description!
@@ -67,6 +68,7 @@ class Component:
67
68
  title: str
68
69
  description: Optional[str]
69
70
  last_edited_at: Optional[datetime] = None
71
+ last_published_at: Optional[datetime] = None
70
72
  created_at: Optional[datetime] = None
71
73
  status: Optional[Status] = None
72
74
  categories: Optional[List[Category]] = None
@@ -379,6 +379,14 @@ class ExploreUpstreamViewField:
379
379
  : -(len(self.field.field_group_variant.lower()) + 1)
380
380
  ]
381
381
 
382
+ # Validate that field_name is not empty to prevent invalid schema field URNs
383
+ if not field_name or not field_name.strip():
384
+ logger.warning(
385
+ f"Empty field name detected for field '{self.field.name}' in explore '{self.explore.name}'. "
386
+ f"Skipping field to prevent invalid schema field URN generation."
387
+ )
388
+ return None
389
+
382
390
  assert view_name # for lint false positive
383
391
 
384
392
  project_include: ProjectInclude = ProjectInclude(
@@ -1351,7 +1359,25 @@ class LookerExplore:
1351
1359
  fine_grained_lineages = []
1352
1360
  if config.extract_column_level_lineage:
1353
1361
  for field in self.fields or []:
1362
+ # Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
1363
+ if not field.name or not field.name.strip():
1364
+ logger.warning(
1365
+ f"Skipping fine-grained lineage for field with empty name in explore '{self.name}'"
1366
+ )
1367
+ continue
1368
+
1354
1369
  for upstream_column_ref in field.upstream_fields:
1370
+ # Skip creating fine-grained lineage for empty column names to prevent invalid schema field URNs
1371
+ if (
1372
+ not upstream_column_ref.column
1373
+ or not upstream_column_ref.column.strip()
1374
+ ):
1375
+ logger.warning(
1376
+ f"Skipping some fine-grained lineage for field '{field.name}' in explore '{self.name}' "
1377
+ f"due to empty upstream column name in table '{upstream_column_ref.table}'"
1378
+ )
1379
+ continue
1380
+
1355
1381
  fine_grained_lineages.append(
1356
1382
  FineGrainedLineageClass(
1357
1383
  upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
@@ -55,6 +55,7 @@ class SnowflakeObjectDomain(StrEnum):
55
55
  ICEBERG_TABLE = "iceberg table"
56
56
  STREAM = "stream"
57
57
  PROCEDURE = "procedure"
58
+ DYNAMIC_TABLE = "dynamic table"
58
59
 
59
60
 
60
61
  GENERIC_PERMISSION_ERROR_KEY = "permission-error"
@@ -8,7 +8,7 @@ from datahub.ingestion.source.snowflake.snowflake_config import (
8
8
  )
9
9
  from datahub.utilities.prefix_batch_builder import PrefixGroup
10
10
 
11
- SHOW_VIEWS_MAX_PAGE_SIZE = 10000
11
+ SHOW_COMMAND_MAX_PAGE_SIZE = 10000
12
12
  SHOW_STREAM_MAX_PAGE_SIZE = 10000
13
13
 
14
14
 
@@ -38,12 +38,23 @@ class SnowflakeQuery:
38
38
  SnowflakeObjectDomain.MATERIALIZED_VIEW.capitalize(),
39
39
  SnowflakeObjectDomain.ICEBERG_TABLE.capitalize(),
40
40
  SnowflakeObjectDomain.STREAM.capitalize(),
41
+ SnowflakeObjectDomain.DYNAMIC_TABLE.capitalize(),
41
42
  }
42
43
 
43
44
  ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER = "({})".format(
44
45
  ",".join(f"'{domain}'" for domain in ACCESS_HISTORY_TABLE_VIEW_DOMAINS)
45
46
  )
46
47
 
48
+ # Domains that can be downstream tables in lineage
49
+ DOWNSTREAM_TABLE_DOMAINS = {
50
+ SnowflakeObjectDomain.TABLE.capitalize(),
51
+ SnowflakeObjectDomain.DYNAMIC_TABLE.capitalize(),
52
+ }
53
+
54
+ DOWNSTREAM_TABLE_DOMAINS_FILTER = "({})".format(
55
+ ",".join(f"'{domain}'" for domain in DOWNSTREAM_TABLE_DOMAINS)
56
+ )
57
+
47
58
  @staticmethod
48
59
  def current_account() -> str:
49
60
  return "select CURRENT_ACCOUNT()"
@@ -235,7 +246,7 @@ class SnowflakeQuery:
235
246
  @staticmethod
236
247
  def show_views_for_database(
237
248
  db_name: str,
238
- limit: int = SHOW_VIEWS_MAX_PAGE_SIZE,
249
+ limit: int = SHOW_COMMAND_MAX_PAGE_SIZE,
239
250
  view_pagination_marker: Optional[str] = None,
240
251
  ) -> str:
241
252
  # While there is an information_schema.views view, that only shows the view definition if the role
@@ -244,7 +255,7 @@ class SnowflakeQuery:
244
255
 
245
256
  # SHOW VIEWS can return a maximum of 10000 rows.
246
257
  # https://docs.snowflake.com/en/sql-reference/sql/show-views#usage-notes
247
- assert limit <= SHOW_VIEWS_MAX_PAGE_SIZE
258
+ assert limit <= SHOW_COMMAND_MAX_PAGE_SIZE
248
259
 
249
260
  # To work around this, we paginate through the results using the FROM clause.
250
261
  from_clause = (
@@ -686,7 +697,7 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
686
697
  AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
687
698
  AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)
688
699
  AND upstream_table_domain in {allowed_upstream_table_domains}
689
- AND downstream_table_domain = '{SnowflakeObjectDomain.TABLE.capitalize()}'
700
+ AND downstream_table_domain in {SnowflakeQuery.DOWNSTREAM_TABLE_DOMAINS_FILTER}
690
701
  {("AND " + upstream_sql_filter) if upstream_sql_filter else ""}
691
702
  ),
692
703
  column_upstream_jobs AS (
@@ -843,7 +854,7 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
843
854
  AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
844
855
  AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)
845
856
  AND upstream_table_domain in {allowed_upstream_table_domains}
846
- AND downstream_table_domain = '{SnowflakeObjectDomain.TABLE.capitalize()}'
857
+ AND downstream_table_domain in {SnowflakeQuery.DOWNSTREAM_TABLE_DOMAINS_FILTER}
847
858
  {("AND " + upstream_sql_filter) if upstream_sql_filter else ""}
848
859
  ),
849
860
  table_upstream_jobs_unique AS (
@@ -940,3 +951,37 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
940
951
  f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
941
952
  )
942
953
  return f"""SHOW STREAMS IN DATABASE "{db_name}" LIMIT {limit} {from_clause};"""
954
+
955
+ @staticmethod
956
+ def show_dynamic_tables_for_database(
957
+ db_name: str,
958
+ limit: int = SHOW_COMMAND_MAX_PAGE_SIZE,
959
+ dynamic_table_pagination_marker: Optional[str] = None,
960
+ ) -> str:
961
+ """Get dynamic table definitions using SHOW DYNAMIC TABLES."""
962
+ assert limit <= SHOW_COMMAND_MAX_PAGE_SIZE
963
+
964
+ from_clause = (
965
+ f"""FROM '{dynamic_table_pagination_marker}'"""
966
+ if dynamic_table_pagination_marker
967
+ else ""
968
+ )
969
+ return f"""\
970
+ SHOW DYNAMIC TABLES IN DATABASE "{db_name}"
971
+ LIMIT {limit} {from_clause};
972
+ """
973
+
974
+ @staticmethod
975
+ def get_dynamic_table_graph_history(db_name: str) -> str:
976
+ """Get dynamic table dependency information from information schema."""
977
+ return f"""
978
+ SELECT
979
+ name,
980
+ inputs,
981
+ target_lag_type,
982
+ target_lag_sec,
983
+ scheduling_state,
984
+ alter_trigger
985
+ FROM TABLE("{db_name}".INFORMATION_SCHEMA.DYNAMIC_TABLE_GRAPH_HISTORY())
986
+ ORDER BY name
987
+ """