acryl-datahub 1.1.0.5rc8__py3-none-any.whl → 1.1.0.5rc10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/METADATA +2465 -2465
- {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/RECORD +47 -47
- datahub/_version.py +1 -1
- datahub/cli/check_cli.py +45 -1
- datahub/cli/cli_utils.py +0 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +5 -0
- datahub/cli/docker_cli.py +2 -0
- datahub/cli/exists_cli.py +2 -0
- datahub/cli/get_cli.py +2 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +7 -0
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +3 -0
- datahub/cli/specific/assertions_cli.py +2 -0
- datahub/cli/specific/datacontract_cli.py +3 -0
- datahub/cli/specific/dataproduct_cli.py +11 -0
- datahub/cli/specific/dataset_cli.py +4 -0
- datahub/cli/specific/forms_cli.py +2 -0
- datahub/cli/specific/group_cli.py +2 -0
- datahub/cli/specific/structuredproperties_cli.py +4 -0
- datahub/cli/specific/user_cli.py +2 -0
- datahub/cli/state_cli.py +2 -0
- datahub/cli/timeline_cli.py +2 -0
- datahub/emitter/rest_emitter.py +24 -8
- datahub/ingestion/api/report.py +72 -12
- datahub/ingestion/autogenerated/capability_summary.json +19 -1
- datahub/ingestion/autogenerated/lineage_helper.py +101 -19
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/dremio/dremio_api.py +38 -27
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +127 -0
- datahub/ingestion/source/sql/sql_common.py +4 -0
- datahub/ingestion/source/sql/teradata.py +993 -234
- datahub/ingestion/source/tableau/tableau.py +11 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -2
- datahub/metadata/_internal_schema_classes.py +528 -529
- datahub/metadata/_urns/urn_defs.py +1803 -1803
- datahub/metadata/schema.avsc +16720 -17109
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +1 -3
- datahub/sdk/main_client.py +14 -2
- datahub/sdk/search_client.py +4 -3
- datahub/telemetry/telemetry.py +17 -11
- {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
from datahub.api.entities.datacontract.datacontract import DataContract
|
|
8
8
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
9
|
from datahub.ingestion.graph.config import ClientMode
|
|
10
|
+
from datahub.upgrade import upgrade
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger(__name__)
|
|
12
13
|
|
|
@@ -19,6 +20,7 @@ def datacontract() -> None:
|
|
|
19
20
|
|
|
20
21
|
@datacontract.command()
|
|
21
22
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
23
|
+
@upgrade.check_upgrade
|
|
22
24
|
def upsert(file: str) -> None:
|
|
23
25
|
"""Upsert (create or update) a Data Contract in DataHub."""
|
|
24
26
|
|
|
@@ -55,6 +57,7 @@ def upsert(file: str) -> None:
|
|
|
55
57
|
help="The file containing the data contract definition",
|
|
56
58
|
)
|
|
57
59
|
@click.option("--hard/--soft", required=False, is_flag=True, default=False)
|
|
60
|
+
@upgrade.check_upgrade
|
|
58
61
|
def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
|
|
59
62
|
"""Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
|
|
60
63
|
|
|
@@ -23,6 +23,7 @@ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
|
23
23
|
from datahub.ingestion.graph.config import ClientMode
|
|
24
24
|
from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass
|
|
25
25
|
from datahub.specific.dataproduct import DataProductPatchBuilder
|
|
26
|
+
from datahub.upgrade import upgrade
|
|
26
27
|
from datahub.utilities.urns.urn import Urn
|
|
27
28
|
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
@@ -127,6 +128,7 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
|
|
|
127
128
|
"--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
|
|
128
129
|
)
|
|
129
130
|
@click.option("--external-url", required=False, type=str)
|
|
131
|
+
@upgrade.check_upgrade
|
|
130
132
|
def update(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
131
133
|
"""Create or Update a Data Product in DataHub. Use upsert if you want to apply partial updates."""
|
|
132
134
|
|
|
@@ -141,6 +143,7 @@ def update(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
|
141
143
|
"--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
|
|
142
144
|
)
|
|
143
145
|
@click.option("--external-url", required=False, type=str)
|
|
146
|
+
@upgrade.check_upgrade
|
|
144
147
|
def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
145
148
|
"""Upsert attributes to a Data Product in DataHub."""
|
|
146
149
|
|
|
@@ -152,6 +155,7 @@ def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
|
152
155
|
)
|
|
153
156
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
154
157
|
@click.option("--update", required=False, is_flag=True, default=False)
|
|
158
|
+
@upgrade.check_upgrade
|
|
155
159
|
def diff(file: Path, update: bool) -> None:
|
|
156
160
|
"""Diff a Data Product file with its twin in DataHub"""
|
|
157
161
|
|
|
@@ -197,6 +201,7 @@ def diff(file: Path, update: bool) -> None:
|
|
|
197
201
|
help="The file containing the data product definition",
|
|
198
202
|
)
|
|
199
203
|
@click.option("--hard/--soft", required=False, is_flag=True, default=False)
|
|
204
|
+
@upgrade.check_upgrade
|
|
200
205
|
def delete(urn: str, file: Path, hard: bool) -> None:
|
|
201
206
|
"""Delete a Data Product in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
|
|
202
207
|
|
|
@@ -231,6 +236,7 @@ def delete(urn: str, file: Path, hard: bool) -> None:
|
|
|
231
236
|
)
|
|
232
237
|
@click.option("--urn", required=True, type=str)
|
|
233
238
|
@click.option("--to-file", required=False, type=str)
|
|
239
|
+
@upgrade.check_upgrade
|
|
234
240
|
def get(urn: str, to_file: str) -> None:
|
|
235
241
|
"""Get a Data Product from DataHub"""
|
|
236
242
|
|
|
@@ -266,6 +272,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
266
272
|
type=click.Path(exists=True),
|
|
267
273
|
help="A markdown file that contains documentation for this data product",
|
|
268
274
|
)
|
|
275
|
+
@upgrade.check_upgrade
|
|
269
276
|
def set_description(urn: str, description: str, md_file: Path) -> None:
|
|
270
277
|
"""Set description for a Data Product in DataHub"""
|
|
271
278
|
|
|
@@ -315,6 +322,7 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
|
|
|
315
322
|
),
|
|
316
323
|
default=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
317
324
|
)
|
|
325
|
+
@upgrade.check_upgrade
|
|
318
326
|
def add_owner(urn: str, owner: str, owner_type: str) -> None:
|
|
319
327
|
"""Add owner for a Data Product in DataHub"""
|
|
320
328
|
|
|
@@ -336,6 +344,7 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
|
|
|
336
344
|
@dataproduct.command(name="remove_owner", help="Remove an owner from a Data Product")
|
|
337
345
|
@click.option("--urn", required=True, type=str)
|
|
338
346
|
@click.argument("owner_urn", required=True, type=str)
|
|
347
|
+
@upgrade.check_upgrade
|
|
339
348
|
def remove_owner(urn: str, owner_urn: str) -> None:
|
|
340
349
|
"""Remove owner for a Data Product in DataHub"""
|
|
341
350
|
|
|
@@ -356,6 +365,7 @@ def remove_owner(urn: str, owner_urn: str) -> None:
|
|
|
356
365
|
@click.option(
|
|
357
366
|
"--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
|
|
358
367
|
)
|
|
368
|
+
@upgrade.check_upgrade
|
|
359
369
|
def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
360
370
|
"""Add asset for a Data Product in DataHub"""
|
|
361
371
|
|
|
@@ -381,6 +391,7 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
|
381
391
|
@click.option(
|
|
382
392
|
"--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
|
|
383
393
|
)
|
|
394
|
+
@upgrade.check_upgrade
|
|
384
395
|
def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
385
396
|
"""Remove asset for a Data Product in DataHub"""
|
|
386
397
|
|
|
@@ -14,6 +14,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
|
14
14
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
15
15
|
from datahub.ingestion.graph.config import ClientMode
|
|
16
16
|
from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
|
|
17
|
+
from datahub.upgrade import upgrade
|
|
17
18
|
|
|
18
19
|
logger = logging.getLogger(__name__)
|
|
19
20
|
|
|
@@ -43,6 +44,7 @@ def upsert(file: Path, dry_run: bool) -> None:
|
|
|
43
44
|
)
|
|
44
45
|
@click.option("--urn", required=True, type=str)
|
|
45
46
|
@click.option("--to-file", required=False, type=str)
|
|
47
|
+
@upgrade.check_upgrade
|
|
46
48
|
def get(urn: str, to_file: str) -> None:
|
|
47
49
|
"""Get a Dataset from DataHub"""
|
|
48
50
|
|
|
@@ -71,6 +73,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
71
73
|
help="URN of secondary sibling(s)",
|
|
72
74
|
multiple=True,
|
|
73
75
|
)
|
|
76
|
+
@upgrade.check_upgrade
|
|
74
77
|
def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None:
|
|
75
78
|
all_urns = set()
|
|
76
79
|
all_urns.add(urn)
|
|
@@ -165,6 +168,7 @@ def file(lintcheck: bool, lintfix: bool, file: str) -> None:
|
|
|
165
168
|
@click.option(
|
|
166
169
|
"-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
|
|
167
170
|
)
|
|
171
|
+
@upgrade.check_upgrade
|
|
168
172
|
def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
|
|
169
173
|
"""Sync a Dataset file to/from DataHub"""
|
|
170
174
|
|
|
@@ -8,6 +8,7 @@ from click_default_group import DefaultGroup
|
|
|
8
8
|
from datahub.api.entities.forms.forms import Forms
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
10
|
from datahub.ingestion.graph.config import ClientMode
|
|
11
|
+
from datahub.upgrade import upgrade
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
13
14
|
|
|
@@ -33,6 +34,7 @@ def upsert(file: Path) -> None:
|
|
|
33
34
|
)
|
|
34
35
|
@click.option("--urn", required=True, type=str)
|
|
35
36
|
@click.option("--to-file", required=False, type=str)
|
|
37
|
+
@upgrade.check_upgrade
|
|
36
38
|
def get(urn: str, to_file: str) -> None:
|
|
37
39
|
"""Get form from DataHub"""
|
|
38
40
|
with get_default_graph(ClientMode.CLI) as graph:
|
|
@@ -11,6 +11,7 @@ from datahub.api.entities.corpgroup.corpgroup import (
|
|
|
11
11
|
from datahub.cli.specific.file_loader import load_file
|
|
12
12
|
from datahub.ingestion.graph.client import get_default_graph
|
|
13
13
|
from datahub.ingestion.graph.config import ClientMode
|
|
14
|
+
from datahub.upgrade import upgrade
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
|
|
@@ -32,6 +33,7 @@ def group() -> None:
|
|
|
32
33
|
default=False,
|
|
33
34
|
help="When set, writes to the editable section of the metadata graph, overwriting writes from the UI",
|
|
34
35
|
)
|
|
36
|
+
@upgrade.check_upgrade
|
|
35
37
|
def upsert(file: Path, override_editable: bool) -> None:
|
|
36
38
|
"""Create or Update a Group with embedded Users"""
|
|
37
39
|
|
|
@@ -12,6 +12,7 @@ from datahub.api.entities.structuredproperties.structuredproperties import (
|
|
|
12
12
|
)
|
|
13
13
|
from datahub.ingestion.graph.client import get_default_graph
|
|
14
14
|
from datahub.ingestion.graph.config import ClientMode
|
|
15
|
+
from datahub.upgrade import upgrade
|
|
15
16
|
from datahub.utilities.urns.urn import Urn
|
|
16
17
|
|
|
17
18
|
logger = logging.getLogger(__name__)
|
|
@@ -27,6 +28,7 @@ def properties() -> None:
|
|
|
27
28
|
name="upsert",
|
|
28
29
|
)
|
|
29
30
|
@click.option("-f", "--file", required=True, type=click.Path(exists=True))
|
|
31
|
+
@upgrade.check_upgrade
|
|
30
32
|
def upsert(file: Path) -> None:
|
|
31
33
|
"""Upsert structured properties in DataHub."""
|
|
32
34
|
|
|
@@ -39,6 +41,7 @@ def upsert(file: Path) -> None:
|
|
|
39
41
|
)
|
|
40
42
|
@click.option("--urn", required=True, type=str)
|
|
41
43
|
@click.option("--to-file", required=False, type=str)
|
|
44
|
+
@upgrade.check_upgrade
|
|
42
45
|
def get(urn: str, to_file: str) -> None:
|
|
43
46
|
"""Get structured properties from DataHub"""
|
|
44
47
|
urn = Urn.make_structured_property_urn(urn)
|
|
@@ -65,6 +68,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
65
68
|
)
|
|
66
69
|
@click.option("--details/--no-details", is_flag=True, default=True)
|
|
67
70
|
@click.option("--to-file", required=False, type=str)
|
|
71
|
+
@upgrade.check_upgrade
|
|
68
72
|
def list(details: bool, to_file: str) -> None:
|
|
69
73
|
"""List structured properties in DataHub"""
|
|
70
74
|
|
datahub/cli/specific/user_cli.py
CHANGED
|
@@ -9,6 +9,7 @@ from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationC
|
|
|
9
9
|
from datahub.cli.specific.file_loader import load_file
|
|
10
10
|
from datahub.ingestion.graph.client import get_default_graph
|
|
11
11
|
from datahub.ingestion.graph.config import ClientMode
|
|
12
|
+
from datahub.upgrade import upgrade
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
@@ -30,6 +31,7 @@ def user() -> None:
|
|
|
30
31
|
is_flag=True,
|
|
31
32
|
help="Use this flag to overwrite the information that is set via the UI",
|
|
32
33
|
)
|
|
34
|
+
@upgrade.check_upgrade
|
|
33
35
|
def upsert(file: Path, override_editable: bool) -> None:
|
|
34
36
|
"""Create or Update a User in DataHub"""
|
|
35
37
|
|
datahub/cli/state_cli.py
CHANGED
|
@@ -6,6 +6,7 @@ from click_default_group import DefaultGroup
|
|
|
6
6
|
|
|
7
7
|
from datahub.ingestion.graph.client import get_default_graph
|
|
8
8
|
from datahub.ingestion.graph.config import ClientMode
|
|
9
|
+
from datahub.upgrade import upgrade
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
@@ -19,6 +20,7 @@ def state() -> None:
|
|
|
19
20
|
@state.command()
|
|
20
21
|
@click.option("--pipeline-name", required=True, type=str)
|
|
21
22
|
@click.option("--platform", required=True, type=str)
|
|
23
|
+
@upgrade.check_upgrade
|
|
22
24
|
def inspect(pipeline_name: str, platform: str) -> None:
|
|
23
25
|
"""
|
|
24
26
|
Get the latest stateful ingestion state for a given pipeline.
|
datahub/cli/timeline_cli.py
CHANGED
|
@@ -10,6 +10,7 @@ from requests import Response
|
|
|
10
10
|
from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
|
|
11
11
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
12
12
|
from datahub.ingestion.graph.config import ClientMode
|
|
13
|
+
from datahub.upgrade import upgrade
|
|
13
14
|
from datahub.utilities.urns.urn import Urn
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
@@ -127,6 +128,7 @@ def get_timeline(
|
|
|
127
128
|
)
|
|
128
129
|
@click.option("--raw", type=bool, is_flag=True, help="Show the raw diff")
|
|
129
130
|
@click.pass_context
|
|
131
|
+
@upgrade.check_upgrade
|
|
130
132
|
def timeline(
|
|
131
133
|
ctx: Any,
|
|
132
134
|
urn: str,
|
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -61,6 +61,10 @@ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
|
61
61
|
MetadataChangeProposal,
|
|
62
62
|
)
|
|
63
63
|
from datahub.metadata.com.linkedin.pegasus2avro.usage import UsageAggregation
|
|
64
|
+
from datahub.metadata.schema_classes import (
|
|
65
|
+
KEY_ASPECT_NAMES,
|
|
66
|
+
ChangeTypeClass,
|
|
67
|
+
)
|
|
64
68
|
from datahub.utilities.server_config_util import RestServiceConfig, ServiceFeature
|
|
65
69
|
|
|
66
70
|
if TYPE_CHECKING:
|
|
@@ -626,15 +630,27 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
626
630
|
trace_data = extract_trace_data(response) if response else None
|
|
627
631
|
|
|
628
632
|
else:
|
|
629
|
-
|
|
633
|
+
if mcp.changeType == ChangeTypeClass.DELETE:
|
|
634
|
+
if mcp.aspectName not in KEY_ASPECT_NAMES:
|
|
635
|
+
raise OperationalError(
|
|
636
|
+
f"Delete not supported for non key aspect: {mcp.aspectName} for urn: "
|
|
637
|
+
f"{mcp.entityUrn}"
|
|
638
|
+
)
|
|
630
639
|
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
640
|
+
url = f"{self._gms_server}/entities?action=delete"
|
|
641
|
+
payload_dict = {
|
|
642
|
+
"urn": mcp.entityUrn,
|
|
643
|
+
}
|
|
644
|
+
else:
|
|
645
|
+
url = f"{self._gms_server}/aspects?action=ingestProposal"
|
|
646
|
+
|
|
647
|
+
mcp_obj = preserve_unicode_escapes(pre_json_transform(mcp.to_obj()))
|
|
648
|
+
payload_dict = {
|
|
649
|
+
"proposal": mcp_obj,
|
|
650
|
+
"async": "true"
|
|
651
|
+
if emit_mode in (EmitMode.ASYNC, EmitMode.ASYNC_WAIT)
|
|
652
|
+
else "false",
|
|
653
|
+
}
|
|
638
654
|
|
|
639
655
|
payload = json.dumps(payload_dict)
|
|
640
656
|
|
datahub/ingestion/api/report.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import Any, Dict, List, Optional, Set, Union, cast, runtime_checkabl
|
|
|
11
11
|
import humanfriendly
|
|
12
12
|
import pydantic
|
|
13
13
|
from pydantic import BaseModel
|
|
14
|
+
from tabulate import tabulate
|
|
14
15
|
from typing_extensions import Literal, Protocol
|
|
15
16
|
|
|
16
17
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
@@ -95,7 +96,58 @@ class Report(SupportsAsObj):
|
|
|
95
96
|
}
|
|
96
97
|
|
|
97
98
|
def as_string(self) -> str:
|
|
98
|
-
|
|
99
|
+
self_obj = self.as_obj()
|
|
100
|
+
_aspects_by_subtypes = self_obj.pop("aspects_by_subtypes", None)
|
|
101
|
+
|
|
102
|
+
# Format the main report data
|
|
103
|
+
result = pprint.pformat(self_obj, width=150, sort_dicts=False)
|
|
104
|
+
|
|
105
|
+
# Add aspects_by_subtypes table if it exists
|
|
106
|
+
if _aspects_by_subtypes:
|
|
107
|
+
result += "\n\nAspects by Subtypes:\n"
|
|
108
|
+
result += self._format_aspects_by_subtypes_table(_aspects_by_subtypes)
|
|
109
|
+
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
def _format_aspects_by_subtypes_table(
|
|
113
|
+
self, aspects_by_subtypes: Dict[str, Dict[str, Dict[str, int]]]
|
|
114
|
+
) -> str:
|
|
115
|
+
"""Format aspects_by_subtypes data as a table with aspects as rows and entity/subtype as columns."""
|
|
116
|
+
if not aspects_by_subtypes:
|
|
117
|
+
return "No aspects by subtypes data available."
|
|
118
|
+
|
|
119
|
+
all_aspects: set[str] = {
|
|
120
|
+
aspect
|
|
121
|
+
for subtypes in aspects_by_subtypes.values()
|
|
122
|
+
for aspects in subtypes.values()
|
|
123
|
+
for aspect in aspects
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
aspect_rows = sorted(all_aspects)
|
|
127
|
+
|
|
128
|
+
entity_subtype_columns = []
|
|
129
|
+
for entity_type, subtypes in aspects_by_subtypes.items():
|
|
130
|
+
for subtype in subtypes:
|
|
131
|
+
entity_subtype_columns.append(f"{entity_type} ({subtype})")
|
|
132
|
+
|
|
133
|
+
entity_subtype_columns.sort()
|
|
134
|
+
|
|
135
|
+
headers = ["Aspect"] + entity_subtype_columns
|
|
136
|
+
|
|
137
|
+
table_data = [
|
|
138
|
+
[aspect]
|
|
139
|
+
+ [
|
|
140
|
+
aspects.get(aspect, 0)
|
|
141
|
+
for subtypes in aspects_by_subtypes.values()
|
|
142
|
+
for aspects in subtypes.values()
|
|
143
|
+
]
|
|
144
|
+
for aspect in aspect_rows
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
if table_data:
|
|
148
|
+
return tabulate(table_data, headers=headers, tablefmt="grid")
|
|
149
|
+
else:
|
|
150
|
+
return "No aspects by subtypes data available."
|
|
99
151
|
|
|
100
152
|
def as_json(self) -> str:
|
|
101
153
|
return json.dumps(self.as_obj())
|
|
@@ -108,7 +160,7 @@ class SourceReportSubtypes:
|
|
|
108
160
|
urn: str
|
|
109
161
|
entity_type: str
|
|
110
162
|
subType: str = field(default="unknown")
|
|
111
|
-
aspects:
|
|
163
|
+
aspects: Dict[str, int] = field(default_factory=dict)
|
|
112
164
|
|
|
113
165
|
|
|
114
166
|
class ReportAttribute(BaseModel):
|
|
@@ -156,7 +208,7 @@ class ExamplesReport(Report, Closeable):
|
|
|
156
208
|
"urn": lambda val: val.urn,
|
|
157
209
|
"entityType": lambda val: val.entity_type,
|
|
158
210
|
"subTypes": lambda val: val.subType,
|
|
159
|
-
"aspects": lambda val: json.dumps(
|
|
211
|
+
"aspects": lambda val: json.dumps(val.aspects),
|
|
160
212
|
},
|
|
161
213
|
)
|
|
162
214
|
|
|
@@ -295,20 +347,26 @@ class ExamplesReport(Report, Closeable):
|
|
|
295
347
|
if urn in self._file_based_dict:
|
|
296
348
|
if sub_type != "unknown":
|
|
297
349
|
self._file_based_dict[urn].subType = sub_type
|
|
298
|
-
self._file_based_dict[urn].aspects
|
|
350
|
+
aspects_dict = self._file_based_dict[urn].aspects
|
|
351
|
+
if aspectName in aspects_dict:
|
|
352
|
+
aspects_dict[aspectName] += 1
|
|
353
|
+
else:
|
|
354
|
+
aspects_dict[aspectName] = 1
|
|
299
355
|
if has_fine_grained_lineage:
|
|
300
|
-
self.
|
|
301
|
-
self._fine_grained_lineage_special_case_name
|
|
302
|
-
|
|
356
|
+
if self._fine_grained_lineage_special_case_name in aspects_dict:
|
|
357
|
+
aspects_dict[self._fine_grained_lineage_special_case_name] += 1
|
|
358
|
+
else:
|
|
359
|
+
aspects_dict[self._fine_grained_lineage_special_case_name] = 1
|
|
303
360
|
self._file_based_dict.mark_dirty(urn)
|
|
304
361
|
else:
|
|
362
|
+
aspects_dict = {aspectName: 1}
|
|
363
|
+
if has_fine_grained_lineage:
|
|
364
|
+
aspects_dict[self._fine_grained_lineage_special_case_name] = 1
|
|
305
365
|
self._file_based_dict[urn] = SourceReportSubtypes(
|
|
306
366
|
urn=urn,
|
|
307
367
|
entity_type=entityType,
|
|
308
368
|
subType=sub_type,
|
|
309
|
-
aspects=
|
|
310
|
-
if not has_fine_grained_lineage
|
|
311
|
-
else {aspectName, self._fine_grained_lineage_special_case_name},
|
|
369
|
+
aspects=aspects_dict,
|
|
312
370
|
)
|
|
313
371
|
|
|
314
372
|
def _store_workunit_data(self, wu: MetadataWorkUnit) -> None:
|
|
@@ -348,8 +406,10 @@ class ExamplesReport(Report, Closeable):
|
|
|
348
406
|
aspects_raw = row["aspects"] or "[]"
|
|
349
407
|
|
|
350
408
|
aspects = json.loads(aspects_raw)
|
|
351
|
-
for aspect in aspects:
|
|
352
|
-
entity_subtype_aspect_counts[entity_type][sub_type][aspect] +=
|
|
409
|
+
for aspect, aspect_count in aspects.items():
|
|
410
|
+
entity_subtype_aspect_counts[entity_type][sub_type][aspect] += (
|
|
411
|
+
aspect_count * count
|
|
412
|
+
)
|
|
353
413
|
|
|
354
414
|
self.aspects.clear()
|
|
355
415
|
self.aspects_by_subtypes.clear()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"generated_at": "2025-07-
|
|
2
|
+
"generated_at": "2025-07-14T09:20:09.632850+00:00",
|
|
3
3
|
"generated_by": "metadata-ingestion/scripts/capability_summary.py",
|
|
4
4
|
"plugin_details": {
|
|
5
5
|
"abs": {
|
|
@@ -1628,6 +1628,14 @@
|
|
|
1628
1628
|
},
|
|
1629
1629
|
"mlflow": {
|
|
1630
1630
|
"capabilities": [
|
|
1631
|
+
{
|
|
1632
|
+
"capability": "CONTAINERS",
|
|
1633
|
+
"description": "Extract ML experiments",
|
|
1634
|
+
"subtype_modifier": [
|
|
1635
|
+
"ML Experiment"
|
|
1636
|
+
],
|
|
1637
|
+
"supported": true
|
|
1638
|
+
},
|
|
1631
1639
|
{
|
|
1632
1640
|
"capability": "DESCRIPTIONS",
|
|
1633
1641
|
"description": "Extract descriptions for MLflow Registered Models and Model Versions",
|
|
@@ -3024,6 +3032,16 @@
|
|
|
3024
3032
|
},
|
|
3025
3033
|
"tableau": {
|
|
3026
3034
|
"capabilities": [
|
|
3035
|
+
{
|
|
3036
|
+
"capability": "CONTAINERS",
|
|
3037
|
+
"description": "Enabled by default",
|
|
3038
|
+
"subtype_modifier": [
|
|
3039
|
+
"Project",
|
|
3040
|
+
"Site",
|
|
3041
|
+
"Workbook"
|
|
3042
|
+
],
|
|
3043
|
+
"supported": true
|
|
3044
|
+
},
|
|
3027
3045
|
{
|
|
3028
3046
|
"capability": "LINEAGE_FINE",
|
|
3029
3047
|
"description": "Enabled by default, configure using `extract_column_level_lineage`",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from dataclasses import dataclass
|
|
3
4
|
from functools import lru_cache
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Dict, List, Optional
|
|
@@ -7,7 +8,85 @@ from typing import Dict, List, Optional
|
|
|
7
8
|
logger = logging.getLogger(__name__)
|
|
8
9
|
|
|
9
10
|
# Global cache for lineage data to avoid repeated file reads
|
|
10
|
-
_lineage_data: Optional[
|
|
11
|
+
_lineage_data: Optional["LineageData"] = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Field:
|
|
16
|
+
name: str
|
|
17
|
+
path: str
|
|
18
|
+
isLineage: bool
|
|
19
|
+
relationship: Optional[Dict]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class Aspect:
|
|
24
|
+
name: str
|
|
25
|
+
fields: List[Field]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class Entity:
|
|
30
|
+
name: str
|
|
31
|
+
aspects: Dict[str, Aspect]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class LineageData:
|
|
36
|
+
# entity name -> aspect
|
|
37
|
+
entities: Dict[str, Entity]
|
|
38
|
+
generated_by: str
|
|
39
|
+
generated_at: str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_lineage_data() -> LineageData:
|
|
43
|
+
"""
|
|
44
|
+
This is experimental internal API subject to breaking changes without prior notice.
|
|
45
|
+
"""
|
|
46
|
+
global _lineage_data
|
|
47
|
+
|
|
48
|
+
if _lineage_data is not None:
|
|
49
|
+
return _lineage_data
|
|
50
|
+
|
|
51
|
+
raw_data = _load_lineage_data()
|
|
52
|
+
_entities = raw_data.get("entities", {})
|
|
53
|
+
for entity_name, entity_data in _entities.items():
|
|
54
|
+
entity = Entity(
|
|
55
|
+
name=entity_name,
|
|
56
|
+
aspects={},
|
|
57
|
+
)
|
|
58
|
+
for aspect_name, aspect_data in entity_data.items():
|
|
59
|
+
entity.aspects[aspect_name] = Aspect(
|
|
60
|
+
name=aspect_name,
|
|
61
|
+
fields=[
|
|
62
|
+
Field(
|
|
63
|
+
name=field["name"],
|
|
64
|
+
path=field["path"],
|
|
65
|
+
isLineage=field["isLineage"],
|
|
66
|
+
relationship=field.get("relationship", None),
|
|
67
|
+
)
|
|
68
|
+
for field in aspect_data.get("fields", [])
|
|
69
|
+
],
|
|
70
|
+
)
|
|
71
|
+
_entities[entity_name] = entity
|
|
72
|
+
|
|
73
|
+
_lineage_data = LineageData(
|
|
74
|
+
entities=_entities,
|
|
75
|
+
generated_by=raw_data.get("generated_by", ""),
|
|
76
|
+
generated_at=raw_data.get("generated_at", ""),
|
|
77
|
+
)
|
|
78
|
+
return _lineage_data
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_all_aspect_names() -> List[str]:
|
|
82
|
+
"""
|
|
83
|
+
This is experimental internal API subject to breaking changes without prior notice.
|
|
84
|
+
"""
|
|
85
|
+
entities = get_lineage_data().entities
|
|
86
|
+
if not entities:
|
|
87
|
+
return []
|
|
88
|
+
first_entity = next(iter(entities.values()))
|
|
89
|
+
return list(first_entity.aspects.keys())
|
|
11
90
|
|
|
12
91
|
|
|
13
92
|
def _load_lineage_data() -> Dict:
|
|
@@ -22,11 +101,6 @@ def _load_lineage_data() -> Dict:
|
|
|
22
101
|
Raises:
|
|
23
102
|
json.JSONDecodeError: If lineage.json is malformed
|
|
24
103
|
"""
|
|
25
|
-
global _lineage_data
|
|
26
|
-
|
|
27
|
-
if _lineage_data is not None:
|
|
28
|
-
return _lineage_data
|
|
29
|
-
|
|
30
104
|
# Get the path to lineage.json relative to this file
|
|
31
105
|
current_file = Path(__file__)
|
|
32
106
|
lineage_file = current_file.parent / "lineage.json"
|
|
@@ -36,32 +110,40 @@ def _load_lineage_data() -> Dict:
|
|
|
36
110
|
f"Lineage file not found: {lineage_file}. "
|
|
37
111
|
"This may indicate a packaging issue. Lineage detection will be disabled."
|
|
38
112
|
)
|
|
39
|
-
|
|
40
|
-
return _lineage_data
|
|
113
|
+
return {}
|
|
41
114
|
|
|
42
115
|
try:
|
|
43
116
|
with open(lineage_file, "r") as f:
|
|
44
|
-
|
|
45
|
-
return _lineage_data
|
|
117
|
+
return json.load(f)
|
|
46
118
|
except json.JSONDecodeError as e:
|
|
47
119
|
logger.error(
|
|
48
120
|
f"Failed to parse lineage.json: {e}. Lineage detection will be disabled."
|
|
49
121
|
)
|
|
50
|
-
|
|
51
|
-
return _lineage_data
|
|
122
|
+
return {}
|
|
52
123
|
|
|
53
124
|
|
|
54
125
|
def _get_fields(entity_type: str, aspect_name: str) -> List[Dict]:
|
|
55
126
|
"""
|
|
56
127
|
This is experimental internal API subject to breaking changes without prior notice.
|
|
57
128
|
"""
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
129
|
+
lineage_data = get_lineage_data()
|
|
130
|
+
entity = lineage_data.entities.get(entity_type)
|
|
131
|
+
if not entity:
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
aspect = entity.aspects.get(aspect_name)
|
|
135
|
+
if not aspect:
|
|
136
|
+
return []
|
|
137
|
+
|
|
138
|
+
return [
|
|
139
|
+
{
|
|
140
|
+
"name": field.name,
|
|
141
|
+
"path": field.path,
|
|
142
|
+
"isLineage": field.isLineage,
|
|
143
|
+
"relationship": field.relationship,
|
|
144
|
+
}
|
|
145
|
+
for field in aspect.fields
|
|
146
|
+
]
|
|
65
147
|
|
|
66
148
|
|
|
67
149
|
def _get_lineage_fields(entity_type: str, aspect_name: str) -> List[Dict]:
|
|
@@ -59,6 +59,8 @@ class BIContainerSubTypes(StrEnum):
|
|
|
59
59
|
LOOKER_FOLDER = "Folder"
|
|
60
60
|
LOOKML_PROJECT = "LookML Project"
|
|
61
61
|
LOOKML_MODEL = "LookML Model"
|
|
62
|
+
TABLEAU_SITE = "Site"
|
|
63
|
+
TABLEAU_PROJECT = "Project"
|
|
62
64
|
TABLEAU_WORKBOOK = "Workbook"
|
|
63
65
|
POWERBI_DATASET = "Semantic Model"
|
|
64
66
|
POWERBI_DATASET_TABLE = "Table"
|