acryl-datahub 0.15.0.1rc1__py3-none-any.whl → 0.15.0.1rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -7,7 +7,9 @@ from datahub.emitter.mce_builder import (
7
7
  make_data_platform_urn,
8
8
  make_dataplatform_instance_urn,
9
9
  )
10
+ from datahub.emitter.mcp_builder import DatabaseKey
10
11
  from datahub.metadata.schema_classes import (
12
+ ContainerClass,
11
13
  DataFlowInfoClass,
12
14
  DataJobInfoClass,
13
15
  DataJobInputOutputClass,
@@ -210,6 +212,18 @@ class MSSQLDataJob:
210
212
  status=self.status,
211
213
  )
212
214
 
215
+ @property
216
+ def as_container_aspect(self) -> ContainerClass:
217
+ databaseKey = DatabaseKey(
218
+ platform=self.entity.flow.orchestrator,
219
+ instance=self.entity.flow.platform_instance
220
+ if self.entity.flow.platform_instance
221
+ else None,
222
+ env=self.entity.flow.env,
223
+ database=self.entity.flow.db,
224
+ )
225
+ return ContainerClass(container=databaseKey.as_urn())
226
+
213
227
  @property
214
228
  def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
215
229
  if self.entity.flow.platform_instance:
@@ -257,6 +271,18 @@ class MSSQLDataFlow:
257
271
  externalUrl=self.external_url,
258
272
  )
259
273
 
274
+ @property
275
+ def as_container_aspect(self) -> ContainerClass:
276
+ databaseKey = DatabaseKey(
277
+ platform=self.entity.orchestrator,
278
+ instance=self.entity.platform_instance
279
+ if self.entity.platform_instance
280
+ else None,
281
+ env=self.entity.env,
282
+ database=self.entity.db,
283
+ )
284
+ return ContainerClass(container=databaseKey.as_urn())
285
+
260
286
  @property
261
287
  def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
262
288
  if self.entity.platform_instance:
@@ -639,6 +639,11 @@ class SQLServerSource(SQLAlchemySource):
639
639
  aspect=data_job.as_datajob_info_aspect,
640
640
  ).as_workunit()
641
641
 
642
+ yield MetadataChangeProposalWrapper(
643
+ entityUrn=data_job.urn,
644
+ aspect=data_job.as_container_aspect,
645
+ ).as_workunit()
646
+
642
647
  data_platform_instance_aspect = data_job.as_maybe_platform_instance_aspect
643
648
  if data_platform_instance_aspect:
644
649
  yield MetadataChangeProposalWrapper(
@@ -662,6 +667,11 @@ class SQLServerSource(SQLAlchemySource):
662
667
  aspect=data_flow.as_dataflow_info_aspect,
663
668
  ).as_workunit()
664
669
 
670
+ yield MetadataChangeProposalWrapper(
671
+ entityUrn=data_flow.urn,
672
+ aspect=data_flow.as_container_aspect,
673
+ ).as_workunit()
674
+
665
675
  data_platform_instance_aspect = data_flow.as_maybe_platform_instance_aspect
666
676
  if data_platform_instance_aspect:
667
677
  yield MetadataChangeProposalWrapper(
@@ -49,6 +49,7 @@ from datahub.configuration.source_common import (
49
49
  DatasetSourceConfigMixin,
50
50
  )
51
51
  from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
52
+ from datahub.configuration.validate_field_removal import pydantic_removed_field
52
53
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
53
54
  from datahub.emitter.mcp_builder import (
54
55
  ContainerKey,
@@ -380,11 +381,6 @@ class TableauConfig(
380
381
  description="[advanced] Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using the Tableau API.",
381
382
  )
382
383
 
383
- fetch_size: int = Field(
384
- default=250,
385
- description="Specifies the number of records to retrieve in each batch during a query execution.",
386
- )
387
-
388
384
  # We've found that even with a small workbook page size (e.g. 10), the Tableau API often
389
385
  # returns warnings like this:
390
386
  # {
@@ -499,6 +495,10 @@ class TableauConfig(
499
495
  "This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI.",
500
496
  )
501
497
 
498
+ _fetch_size = pydantic_removed_field(
499
+ "fetch_size",
500
+ )
501
+
502
502
  # pre = True because we want to take some decision before pydantic initialize the configuration to default values
503
503
  @root_validator(pre=True)
504
504
  def projects_backward_compatibility(cls, values: Dict) -> Dict:
@@ -1147,7 +1147,7 @@ class TableauSiteSource:
1147
1147
  connection_type: str,
1148
1148
  query_filter: str,
1149
1149
  current_cursor: Optional[str],
1150
- fetch_size: int = 250,
1150
+ fetch_size: int,
1151
1151
  retry_on_auth_error: bool = True,
1152
1152
  retries_remaining: Optional[int] = None,
1153
1153
  ) -> Tuple[dict, Optional[str], int]:
@@ -1344,7 +1344,11 @@ class TableauSiteSource:
1344
1344
  connection_type=connection_type,
1345
1345
  query_filter=filter_,
1346
1346
  current_cursor=current_cursor,
1347
- fetch_size=self.config.fetch_size,
1347
+ # `filter_page` contains metadata object IDs (e.g., Project IDs, Field IDs, Sheet IDs, etc.).
1348
+ # The number of IDs is always less than or equal to page_size.
1349
+ # If the IDs are primary keys, the number of metadata objects to load matches the number of records to return.
1350
+ # In our case, mostly, the IDs are primary key, therefore, fetch_size is set equal to page_size.
1351
+ fetch_size=page_size,
1348
1352
  )
1349
1353
 
1350
1354
  yield from connection_objects.get(c.NODES) or []
@@ -26,6 +26,9 @@ from datahub.emitter.mcp_builder import (
26
26
  gen_containers,
27
27
  )
28
28
  from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
29
+ from datahub.ingestion.api.auto_work_units.auto_ensure_aspect_size import (
30
+ EnsureAspectSizeProcessor,
31
+ )
29
32
  from datahub.ingestion.api.common import PipelineContext
30
33
  from datahub.ingestion.api.decorators import (
31
34
  SupportStatus,
@@ -260,6 +263,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
260
263
  StaleEntityRemovalHandler.create(
261
264
  self, self.config, self.ctx
262
265
  ).workunit_processor,
266
+ EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size,
263
267
  ]
264
268
 
265
269
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
@@ -14,6 +14,7 @@ LINEAGE_EXTRACTION = "Lineage Extraction"
14
14
  USAGE_EXTRACTION_INGESTION = "Usage Extraction Ingestion"
15
15
  USAGE_EXTRACTION_OPERATIONAL_STATS = "Usage Extraction Operational Stats"
16
16
  USAGE_EXTRACTION_USAGE_AGGREGATION = "Usage Extraction Usage Aggregation"
17
+ EXTERNAL_TABLE_DDL_LINEAGE = "External table DDL Lineage"
17
18
  QUERIES_EXTRACTION = "Queries Extraction"
18
19
  PROFILING = "Profiling"
19
20