acryl-datahub 0.15.0.1rc2__py3-none-any.whl → 0.15.0.1rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc2.dist-info → acryl_datahub-0.15.0.1rc3.dist-info}/METADATA +2459 -2459
- {acryl_datahub-0.15.0.1rc2.dist-info → acryl_datahub-0.15.0.1rc3.dist-info}/RECORD +12 -12
- datahub/__init__.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +231 -27
- datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +16 -2
- datahub/ingestion/source/sql/hive.py +606 -8
- datahub/ingestion/source/sql/mssql/job_models.py +26 -0
- datahub/ingestion/source/sql/mssql/source.py +10 -0
- datahub/ingestion/source/tableau/tableau.py +11 -7
- {acryl_datahub-0.15.0.1rc2.dist-info → acryl_datahub-0.15.0.1rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc2.dist-info → acryl_datahub-0.15.0.1rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc2.dist-info → acryl_datahub-0.15.0.1rc3.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,9 @@ from datahub.emitter.mce_builder import (
|
|
|
7
7
|
make_data_platform_urn,
|
|
8
8
|
make_dataplatform_instance_urn,
|
|
9
9
|
)
|
|
10
|
+
from datahub.emitter.mcp_builder import DatabaseKey
|
|
10
11
|
from datahub.metadata.schema_classes import (
|
|
12
|
+
ContainerClass,
|
|
11
13
|
DataFlowInfoClass,
|
|
12
14
|
DataJobInfoClass,
|
|
13
15
|
DataJobInputOutputClass,
|
|
@@ -210,6 +212,18 @@ class MSSQLDataJob:
|
|
|
210
212
|
status=self.status,
|
|
211
213
|
)
|
|
212
214
|
|
|
215
|
+
@property
|
|
216
|
+
def as_container_aspect(self) -> ContainerClass:
|
|
217
|
+
databaseKey = DatabaseKey(
|
|
218
|
+
platform=self.entity.flow.orchestrator,
|
|
219
|
+
instance=self.entity.flow.platform_instance
|
|
220
|
+
if self.entity.flow.platform_instance
|
|
221
|
+
else None,
|
|
222
|
+
env=self.entity.flow.env,
|
|
223
|
+
database=self.entity.flow.db,
|
|
224
|
+
)
|
|
225
|
+
return ContainerClass(container=databaseKey.as_urn())
|
|
226
|
+
|
|
213
227
|
@property
|
|
214
228
|
def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
|
|
215
229
|
if self.entity.flow.platform_instance:
|
|
@@ -257,6 +271,18 @@ class MSSQLDataFlow:
|
|
|
257
271
|
externalUrl=self.external_url,
|
|
258
272
|
)
|
|
259
273
|
|
|
274
|
+
@property
|
|
275
|
+
def as_container_aspect(self) -> ContainerClass:
|
|
276
|
+
databaseKey = DatabaseKey(
|
|
277
|
+
platform=self.entity.orchestrator,
|
|
278
|
+
instance=self.entity.platform_instance
|
|
279
|
+
if self.entity.platform_instance
|
|
280
|
+
else None,
|
|
281
|
+
env=self.entity.env,
|
|
282
|
+
database=self.entity.db,
|
|
283
|
+
)
|
|
284
|
+
return ContainerClass(container=databaseKey.as_urn())
|
|
285
|
+
|
|
260
286
|
@property
|
|
261
287
|
def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
|
|
262
288
|
if self.entity.platform_instance:
|
|
@@ -639,6 +639,11 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
639
639
|
aspect=data_job.as_datajob_info_aspect,
|
|
640
640
|
).as_workunit()
|
|
641
641
|
|
|
642
|
+
yield MetadataChangeProposalWrapper(
|
|
643
|
+
entityUrn=data_job.urn,
|
|
644
|
+
aspect=data_job.as_container_aspect,
|
|
645
|
+
).as_workunit()
|
|
646
|
+
|
|
642
647
|
data_platform_instance_aspect = data_job.as_maybe_platform_instance_aspect
|
|
643
648
|
if data_platform_instance_aspect:
|
|
644
649
|
yield MetadataChangeProposalWrapper(
|
|
@@ -662,6 +667,11 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
662
667
|
aspect=data_flow.as_dataflow_info_aspect,
|
|
663
668
|
).as_workunit()
|
|
664
669
|
|
|
670
|
+
yield MetadataChangeProposalWrapper(
|
|
671
|
+
entityUrn=data_flow.urn,
|
|
672
|
+
aspect=data_flow.as_container_aspect,
|
|
673
|
+
).as_workunit()
|
|
674
|
+
|
|
665
675
|
data_platform_instance_aspect = data_flow.as_maybe_platform_instance_aspect
|
|
666
676
|
if data_platform_instance_aspect:
|
|
667
677
|
yield MetadataChangeProposalWrapper(
|
|
@@ -49,6 +49,7 @@ from datahub.configuration.source_common import (
|
|
|
49
49
|
DatasetSourceConfigMixin,
|
|
50
50
|
)
|
|
51
51
|
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
|
|
52
|
+
from datahub.configuration.validate_field_removal import pydantic_removed_field
|
|
52
53
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
53
54
|
from datahub.emitter.mcp_builder import (
|
|
54
55
|
ContainerKey,
|
|
@@ -380,11 +381,6 @@ class TableauConfig(
|
|
|
380
381
|
description="[advanced] Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using the Tableau API.",
|
|
381
382
|
)
|
|
382
383
|
|
|
383
|
-
fetch_size: int = Field(
|
|
384
|
-
default=250,
|
|
385
|
-
description="Specifies the number of records to retrieve in each batch during a query execution.",
|
|
386
|
-
)
|
|
387
|
-
|
|
388
384
|
# We've found that even with a small workbook page size (e.g. 10), the Tableau API often
|
|
389
385
|
# returns warnings like this:
|
|
390
386
|
# {
|
|
@@ -499,6 +495,10 @@ class TableauConfig(
|
|
|
499
495
|
"This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI.",
|
|
500
496
|
)
|
|
501
497
|
|
|
498
|
+
_fetch_size = pydantic_removed_field(
|
|
499
|
+
"fetch_size",
|
|
500
|
+
)
|
|
501
|
+
|
|
502
502
|
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
|
|
503
503
|
@root_validator(pre=True)
|
|
504
504
|
def projects_backward_compatibility(cls, values: Dict) -> Dict:
|
|
@@ -1147,7 +1147,7 @@ class TableauSiteSource:
|
|
|
1147
1147
|
connection_type: str,
|
|
1148
1148
|
query_filter: str,
|
|
1149
1149
|
current_cursor: Optional[str],
|
|
1150
|
-
fetch_size: int
|
|
1150
|
+
fetch_size: int,
|
|
1151
1151
|
retry_on_auth_error: bool = True,
|
|
1152
1152
|
retries_remaining: Optional[int] = None,
|
|
1153
1153
|
) -> Tuple[dict, Optional[str], int]:
|
|
@@ -1344,7 +1344,11 @@ class TableauSiteSource:
|
|
|
1344
1344
|
connection_type=connection_type,
|
|
1345
1345
|
query_filter=filter_,
|
|
1346
1346
|
current_cursor=current_cursor,
|
|
1347
|
-
|
|
1347
|
+
# `filter_page` contains metadata object IDs (e.g., Project IDs, Field IDs, Sheet IDs, etc.).
|
|
1348
|
+
# The number of IDs is always less than or equal to page_size.
|
|
1349
|
+
# If the IDs are primary keys, the number of metadata objects to load matches the number of records to return.
|
|
1350
|
+
# In our case, mostly, the IDs are primary key, therefore, fetch_size is set equal to page_size.
|
|
1351
|
+
fetch_size=page_size,
|
|
1348
1352
|
)
|
|
1349
1353
|
|
|
1350
1354
|
yield from connection_objects.get(c.NODES) or []
|
|
File without changes
|
{acryl_datahub-0.15.0.1rc2.dist-info → acryl_datahub-0.15.0.1rc3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|