acryl-datahub 1.1.0.5rc8__py3-none-any.whl → 1.1.0.5rc10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (47) hide show
  1. {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/METADATA +2465 -2465
  2. {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/RECORD +47 -47
  3. datahub/_version.py +1 -1
  4. datahub/cli/check_cli.py +45 -1
  5. datahub/cli/cli_utils.py +0 -10
  6. datahub/cli/container_cli.py +5 -0
  7. datahub/cli/delete_cli.py +5 -0
  8. datahub/cli/docker_cli.py +2 -0
  9. datahub/cli/exists_cli.py +2 -0
  10. datahub/cli/get_cli.py +2 -0
  11. datahub/cli/iceberg_cli.py +5 -0
  12. datahub/cli/ingest_cli.py +7 -0
  13. datahub/cli/migrate.py +2 -0
  14. datahub/cli/put_cli.py +3 -0
  15. datahub/cli/specific/assertions_cli.py +2 -0
  16. datahub/cli/specific/datacontract_cli.py +3 -0
  17. datahub/cli/specific/dataproduct_cli.py +11 -0
  18. datahub/cli/specific/dataset_cli.py +4 -0
  19. datahub/cli/specific/forms_cli.py +2 -0
  20. datahub/cli/specific/group_cli.py +2 -0
  21. datahub/cli/specific/structuredproperties_cli.py +4 -0
  22. datahub/cli/specific/user_cli.py +2 -0
  23. datahub/cli/state_cli.py +2 -0
  24. datahub/cli/timeline_cli.py +2 -0
  25. datahub/emitter/rest_emitter.py +24 -8
  26. datahub/ingestion/api/report.py +72 -12
  27. datahub/ingestion/autogenerated/capability_summary.json +19 -1
  28. datahub/ingestion/autogenerated/lineage_helper.py +101 -19
  29. datahub/ingestion/source/common/subtypes.py +2 -0
  30. datahub/ingestion/source/dremio/dremio_api.py +38 -27
  31. datahub/ingestion/source/mlflow.py +11 -1
  32. datahub/ingestion/source/snowflake/snowflake_queries.py +127 -0
  33. datahub/ingestion/source/sql/sql_common.py +4 -0
  34. datahub/ingestion/source/sql/teradata.py +993 -234
  35. datahub/ingestion/source/tableau/tableau.py +11 -2
  36. datahub/ingestion/source/tableau/tableau_constant.py +0 -2
  37. datahub/metadata/_internal_schema_classes.py +528 -529
  38. datahub/metadata/_urns/urn_defs.py +1803 -1803
  39. datahub/metadata/schema.avsc +16720 -17109
  40. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +1 -3
  41. datahub/sdk/main_client.py +14 -2
  42. datahub/sdk/search_client.py +4 -3
  43. datahub/telemetry/telemetry.py +17 -11
  44. {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/WHEEL +0 -0
  45. {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/entry_points.txt +0 -0
  46. {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/licenses/LICENSE +0 -0
  47. {acryl_datahub-1.1.0.5rc8.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
7
7
  from datahub.api.entities.datacontract.datacontract import DataContract
8
8
  from datahub.ingestion.graph.client import get_default_graph
9
9
  from datahub.ingestion.graph.config import ClientMode
10
+ from datahub.upgrade import upgrade
10
11
 
11
12
  logger = logging.getLogger(__name__)
12
13
 
@@ -19,6 +20,7 @@ def datacontract() -> None:
19
20
 
20
21
  @datacontract.command()
21
22
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
23
+ @upgrade.check_upgrade
22
24
  def upsert(file: str) -> None:
23
25
  """Upsert (create or update) a Data Contract in DataHub."""
24
26
 
@@ -55,6 +57,7 @@ def upsert(file: str) -> None:
55
57
  help="The file containing the data contract definition",
56
58
  )
57
59
  @click.option("--hard/--soft", required=False, is_flag=True, default=False)
60
+ @upgrade.check_upgrade
58
61
  def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
59
62
  """Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
60
63
 
@@ -23,6 +23,7 @@ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
23
23
  from datahub.ingestion.graph.config import ClientMode
24
24
  from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass
25
25
  from datahub.specific.dataproduct import DataProductPatchBuilder
26
+ from datahub.upgrade import upgrade
26
27
  from datahub.utilities.urns.urn import Urn
27
28
 
28
29
  logger = logging.getLogger(__name__)
@@ -127,6 +128,7 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
127
128
  "--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
128
129
  )
129
130
  @click.option("--external-url", required=False, type=str)
131
+ @upgrade.check_upgrade
130
132
  def update(file: Path, validate_assets: bool, external_url: str) -> None:
131
133
  """Create or Update a Data Product in DataHub. Use upsert if you want to apply partial updates."""
132
134
 
@@ -141,6 +143,7 @@ def update(file: Path, validate_assets: bool, external_url: str) -> None:
141
143
  "--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
142
144
  )
143
145
  @click.option("--external-url", required=False, type=str)
146
+ @upgrade.check_upgrade
144
147
  def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
145
148
  """Upsert attributes to a Data Product in DataHub."""
146
149
 
@@ -152,6 +155,7 @@ def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
152
155
  )
153
156
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
154
157
  @click.option("--update", required=False, is_flag=True, default=False)
158
+ @upgrade.check_upgrade
155
159
  def diff(file: Path, update: bool) -> None:
156
160
  """Diff a Data Product file with its twin in DataHub"""
157
161
 
@@ -197,6 +201,7 @@ def diff(file: Path, update: bool) -> None:
197
201
  help="The file containing the data product definition",
198
202
  )
199
203
  @click.option("--hard/--soft", required=False, is_flag=True, default=False)
204
+ @upgrade.check_upgrade
200
205
  def delete(urn: str, file: Path, hard: bool) -> None:
201
206
  """Delete a Data Product in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
202
207
 
@@ -231,6 +236,7 @@ def delete(urn: str, file: Path, hard: bool) -> None:
231
236
  )
232
237
  @click.option("--urn", required=True, type=str)
233
238
  @click.option("--to-file", required=False, type=str)
239
+ @upgrade.check_upgrade
234
240
  def get(urn: str, to_file: str) -> None:
235
241
  """Get a Data Product from DataHub"""
236
242
 
@@ -266,6 +272,7 @@ def get(urn: str, to_file: str) -> None:
266
272
  type=click.Path(exists=True),
267
273
  help="A markdown file that contains documentation for this data product",
268
274
  )
275
+ @upgrade.check_upgrade
269
276
  def set_description(urn: str, description: str, md_file: Path) -> None:
270
277
  """Set description for a Data Product in DataHub"""
271
278
 
@@ -315,6 +322,7 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
315
322
  ),
316
323
  default=OwnershipTypeClass.TECHNICAL_OWNER,
317
324
  )
325
+ @upgrade.check_upgrade
318
326
  def add_owner(urn: str, owner: str, owner_type: str) -> None:
319
327
  """Add owner for a Data Product in DataHub"""
320
328
 
@@ -336,6 +344,7 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
336
344
  @dataproduct.command(name="remove_owner", help="Remove an owner from a Data Product")
337
345
  @click.option("--urn", required=True, type=str)
338
346
  @click.argument("owner_urn", required=True, type=str)
347
+ @upgrade.check_upgrade
339
348
  def remove_owner(urn: str, owner_urn: str) -> None:
340
349
  """Remove owner for a Data Product in DataHub"""
341
350
 
@@ -356,6 +365,7 @@ def remove_owner(urn: str, owner_urn: str) -> None:
356
365
  @click.option(
357
366
  "--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
358
367
  )
368
+ @upgrade.check_upgrade
359
369
  def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
360
370
  """Add asset for a Data Product in DataHub"""
361
371
 
@@ -381,6 +391,7 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
381
391
  @click.option(
382
392
  "--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
383
393
  )
394
+ @upgrade.check_upgrade
384
395
  def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
385
396
  """Remove asset for a Data Product in DataHub"""
386
397
 
@@ -14,6 +14,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
14
14
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
15
15
  from datahub.ingestion.graph.config import ClientMode
16
16
  from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
17
+ from datahub.upgrade import upgrade
17
18
 
18
19
  logger = logging.getLogger(__name__)
19
20
 
@@ -43,6 +44,7 @@ def upsert(file: Path, dry_run: bool) -> None:
43
44
  )
44
45
  @click.option("--urn", required=True, type=str)
45
46
  @click.option("--to-file", required=False, type=str)
47
+ @upgrade.check_upgrade
46
48
  def get(urn: str, to_file: str) -> None:
47
49
  """Get a Dataset from DataHub"""
48
50
 
@@ -71,6 +73,7 @@ def get(urn: str, to_file: str) -> None:
71
73
  help="URN of secondary sibling(s)",
72
74
  multiple=True,
73
75
  )
76
+ @upgrade.check_upgrade
74
77
  def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None:
75
78
  all_urns = set()
76
79
  all_urns.add(urn)
@@ -165,6 +168,7 @@ def file(lintcheck: bool, lintfix: bool, file: str) -> None:
165
168
  @click.option(
166
169
  "-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
167
170
  )
171
+ @upgrade.check_upgrade
168
172
  def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
169
173
  """Sync a Dataset file to/from DataHub"""
170
174
 
@@ -8,6 +8,7 @@ from click_default_group import DefaultGroup
8
8
  from datahub.api.entities.forms.forms import Forms
9
9
  from datahub.ingestion.graph.client import get_default_graph
10
10
  from datahub.ingestion.graph.config import ClientMode
11
+ from datahub.upgrade import upgrade
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
@@ -33,6 +34,7 @@ def upsert(file: Path) -> None:
33
34
  )
34
35
  @click.option("--urn", required=True, type=str)
35
36
  @click.option("--to-file", required=False, type=str)
37
+ @upgrade.check_upgrade
36
38
  def get(urn: str, to_file: str) -> None:
37
39
  """Get form from DataHub"""
38
40
  with get_default_graph(ClientMode.CLI) as graph:
@@ -11,6 +11,7 @@ from datahub.api.entities.corpgroup.corpgroup import (
11
11
  from datahub.cli.specific.file_loader import load_file
12
12
  from datahub.ingestion.graph.client import get_default_graph
13
13
  from datahub.ingestion.graph.config import ClientMode
14
+ from datahub.upgrade import upgrade
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
 
@@ -32,6 +33,7 @@ def group() -> None:
32
33
  default=False,
33
34
  help="When set, writes to the editable section of the metadata graph, overwriting writes from the UI",
34
35
  )
36
+ @upgrade.check_upgrade
35
37
  def upsert(file: Path, override_editable: bool) -> None:
36
38
  """Create or Update a Group with embedded Users"""
37
39
 
@@ -12,6 +12,7 @@ from datahub.api.entities.structuredproperties.structuredproperties import (
12
12
  )
13
13
  from datahub.ingestion.graph.client import get_default_graph
14
14
  from datahub.ingestion.graph.config import ClientMode
15
+ from datahub.upgrade import upgrade
15
16
  from datahub.utilities.urns.urn import Urn
16
17
 
17
18
  logger = logging.getLogger(__name__)
@@ -27,6 +28,7 @@ def properties() -> None:
27
28
  name="upsert",
28
29
  )
29
30
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
31
+ @upgrade.check_upgrade
30
32
  def upsert(file: Path) -> None:
31
33
  """Upsert structured properties in DataHub."""
32
34
 
@@ -39,6 +41,7 @@ def upsert(file: Path) -> None:
39
41
  )
40
42
  @click.option("--urn", required=True, type=str)
41
43
  @click.option("--to-file", required=False, type=str)
44
+ @upgrade.check_upgrade
42
45
  def get(urn: str, to_file: str) -> None:
43
46
  """Get structured properties from DataHub"""
44
47
  urn = Urn.make_structured_property_urn(urn)
@@ -65,6 +68,7 @@ def get(urn: str, to_file: str) -> None:
65
68
  )
66
69
  @click.option("--details/--no-details", is_flag=True, default=True)
67
70
  @click.option("--to-file", required=False, type=str)
71
+ @upgrade.check_upgrade
68
72
  def list(details: bool, to_file: str) -> None:
69
73
  """List structured properties in DataHub"""
70
74
 
@@ -9,6 +9,7 @@ from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationC
9
9
  from datahub.cli.specific.file_loader import load_file
10
10
  from datahub.ingestion.graph.client import get_default_graph
11
11
  from datahub.ingestion.graph.config import ClientMode
12
+ from datahub.upgrade import upgrade
12
13
 
13
14
  logger = logging.getLogger(__name__)
14
15
 
@@ -30,6 +31,7 @@ def user() -> None:
30
31
  is_flag=True,
31
32
  help="Use this flag to overwrite the information that is set via the UI",
32
33
  )
34
+ @upgrade.check_upgrade
33
35
  def upsert(file: Path, override_editable: bool) -> None:
34
36
  """Create or Update a User in DataHub"""
35
37
 
datahub/cli/state_cli.py CHANGED
@@ -6,6 +6,7 @@ from click_default_group import DefaultGroup
6
6
 
7
7
  from datahub.ingestion.graph.client import get_default_graph
8
8
  from datahub.ingestion.graph.config import ClientMode
9
+ from datahub.upgrade import upgrade
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
@@ -19,6 +20,7 @@ def state() -> None:
19
20
  @state.command()
20
21
  @click.option("--pipeline-name", required=True, type=str)
21
22
  @click.option("--platform", required=True, type=str)
23
+ @upgrade.check_upgrade
22
24
  def inspect(pipeline_name: str, platform: str) -> None:
23
25
  """
24
26
  Get the latest stateful ingestion state for a given pipeline.
@@ -10,6 +10,7 @@ from requests import Response
10
10
  from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
11
11
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
12
12
  from datahub.ingestion.graph.config import ClientMode
13
+ from datahub.upgrade import upgrade
13
14
  from datahub.utilities.urns.urn import Urn
14
15
 
15
16
  logger = logging.getLogger(__name__)
@@ -127,6 +128,7 @@ def get_timeline(
127
128
  )
128
129
  @click.option("--raw", type=bool, is_flag=True, help="Show the raw diff")
129
130
  @click.pass_context
131
+ @upgrade.check_upgrade
130
132
  def timeline(
131
133
  ctx: Any,
132
134
  urn: str,
@@ -61,6 +61,10 @@ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
61
61
  MetadataChangeProposal,
62
62
  )
63
63
  from datahub.metadata.com.linkedin.pegasus2avro.usage import UsageAggregation
64
+ from datahub.metadata.schema_classes import (
65
+ KEY_ASPECT_NAMES,
66
+ ChangeTypeClass,
67
+ )
64
68
  from datahub.utilities.server_config_util import RestServiceConfig, ServiceFeature
65
69
 
66
70
  if TYPE_CHECKING:
@@ -626,15 +630,27 @@ class DataHubRestEmitter(Closeable, Emitter):
626
630
  trace_data = extract_trace_data(response) if response else None
627
631
 
628
632
  else:
629
- url = f"{self._gms_server}/aspects?action=ingestProposal"
633
+ if mcp.changeType == ChangeTypeClass.DELETE:
634
+ if mcp.aspectName not in KEY_ASPECT_NAMES:
635
+ raise OperationalError(
636
+ f"Delete not supported for non key aspect: {mcp.aspectName} for urn: "
637
+ f"{mcp.entityUrn}"
638
+ )
630
639
 
631
- mcp_obj = preserve_unicode_escapes(pre_json_transform(mcp.to_obj()))
632
- payload_dict = {
633
- "proposal": mcp_obj,
634
- "async": "true"
635
- if emit_mode in (EmitMode.ASYNC, EmitMode.ASYNC_WAIT)
636
- else "false",
637
- }
640
+ url = f"{self._gms_server}/entities?action=delete"
641
+ payload_dict = {
642
+ "urn": mcp.entityUrn,
643
+ }
644
+ else:
645
+ url = f"{self._gms_server}/aspects?action=ingestProposal"
646
+
647
+ mcp_obj = preserve_unicode_escapes(pre_json_transform(mcp.to_obj()))
648
+ payload_dict = {
649
+ "proposal": mcp_obj,
650
+ "async": "true"
651
+ if emit_mode in (EmitMode.ASYNC, EmitMode.ASYNC_WAIT)
652
+ else "false",
653
+ }
638
654
 
639
655
  payload = json.dumps(payload_dict)
640
656
 
@@ -11,6 +11,7 @@ from typing import Any, Dict, List, Optional, Set, Union, cast, runtime_checkabl
11
11
  import humanfriendly
12
12
  import pydantic
13
13
  from pydantic import BaseModel
14
+ from tabulate import tabulate
14
15
  from typing_extensions import Literal, Protocol
15
16
 
16
17
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
@@ -95,7 +96,58 @@ class Report(SupportsAsObj):
95
96
  }
96
97
 
97
98
  def as_string(self) -> str:
98
- return pprint.pformat(self.as_obj(), width=150, sort_dicts=False)
99
+ self_obj = self.as_obj()
100
+ _aspects_by_subtypes = self_obj.pop("aspects_by_subtypes", None)
101
+
102
+ # Format the main report data
103
+ result = pprint.pformat(self_obj, width=150, sort_dicts=False)
104
+
105
+ # Add aspects_by_subtypes table if it exists
106
+ if _aspects_by_subtypes:
107
+ result += "\n\nAspects by Subtypes:\n"
108
+ result += self._format_aspects_by_subtypes_table(_aspects_by_subtypes)
109
+
110
+ return result
111
+
112
+ def _format_aspects_by_subtypes_table(
113
+ self, aspects_by_subtypes: Dict[str, Dict[str, Dict[str, int]]]
114
+ ) -> str:
115
+ """Format aspects_by_subtypes data as a table with aspects as rows and entity/subtype as columns."""
116
+ if not aspects_by_subtypes:
117
+ return "No aspects by subtypes data available."
118
+
119
+ all_aspects: set[str] = {
120
+ aspect
121
+ for subtypes in aspects_by_subtypes.values()
122
+ for aspects in subtypes.values()
123
+ for aspect in aspects
124
+ }
125
+
126
+ aspect_rows = sorted(all_aspects)
127
+
128
+ entity_subtype_columns = []
129
+ for entity_type, subtypes in aspects_by_subtypes.items():
130
+ for subtype in subtypes:
131
+ entity_subtype_columns.append(f"{entity_type} ({subtype})")
132
+
133
+ entity_subtype_columns.sort()
134
+
135
+ headers = ["Aspect"] + entity_subtype_columns
136
+
137
+ table_data = [
138
+ [aspect]
139
+ + [
140
+ aspects.get(aspect, 0)
141
+ for subtypes in aspects_by_subtypes.values()
142
+ for aspects in subtypes.values()
143
+ ]
144
+ for aspect in aspect_rows
145
+ ]
146
+
147
+ if table_data:
148
+ return tabulate(table_data, headers=headers, tablefmt="grid")
149
+ else:
150
+ return "No aspects by subtypes data available."
99
151
 
100
152
  def as_json(self) -> str:
101
153
  return json.dumps(self.as_obj())
@@ -108,7 +160,7 @@ class SourceReportSubtypes:
108
160
  urn: str
109
161
  entity_type: str
110
162
  subType: str = field(default="unknown")
111
- aspects: Set[str] = field(default_factory=set)
163
+ aspects: Dict[str, int] = field(default_factory=dict)
112
164
 
113
165
 
114
166
  class ReportAttribute(BaseModel):
@@ -156,7 +208,7 @@ class ExamplesReport(Report, Closeable):
156
208
  "urn": lambda val: val.urn,
157
209
  "entityType": lambda val: val.entity_type,
158
210
  "subTypes": lambda val: val.subType,
159
- "aspects": lambda val: json.dumps(sorted(list(val.aspects))),
211
+ "aspects": lambda val: json.dumps(val.aspects),
160
212
  },
161
213
  )
162
214
 
@@ -295,20 +347,26 @@ class ExamplesReport(Report, Closeable):
295
347
  if urn in self._file_based_dict:
296
348
  if sub_type != "unknown":
297
349
  self._file_based_dict[urn].subType = sub_type
298
- self._file_based_dict[urn].aspects.add(aspectName)
350
+ aspects_dict = self._file_based_dict[urn].aspects
351
+ if aspectName in aspects_dict:
352
+ aspects_dict[aspectName] += 1
353
+ else:
354
+ aspects_dict[aspectName] = 1
299
355
  if has_fine_grained_lineage:
300
- self._file_based_dict[urn].aspects.add(
301
- self._fine_grained_lineage_special_case_name
302
- )
356
+ if self._fine_grained_lineage_special_case_name in aspects_dict:
357
+ aspects_dict[self._fine_grained_lineage_special_case_name] += 1
358
+ else:
359
+ aspects_dict[self._fine_grained_lineage_special_case_name] = 1
303
360
  self._file_based_dict.mark_dirty(urn)
304
361
  else:
362
+ aspects_dict = {aspectName: 1}
363
+ if has_fine_grained_lineage:
364
+ aspects_dict[self._fine_grained_lineage_special_case_name] = 1
305
365
  self._file_based_dict[urn] = SourceReportSubtypes(
306
366
  urn=urn,
307
367
  entity_type=entityType,
308
368
  subType=sub_type,
309
- aspects={aspectName}
310
- if not has_fine_grained_lineage
311
- else {aspectName, self._fine_grained_lineage_special_case_name},
369
+ aspects=aspects_dict,
312
370
  )
313
371
 
314
372
  def _store_workunit_data(self, wu: MetadataWorkUnit) -> None:
@@ -348,8 +406,10 @@ class ExamplesReport(Report, Closeable):
348
406
  aspects_raw = row["aspects"] or "[]"
349
407
 
350
408
  aspects = json.loads(aspects_raw)
351
- for aspect in aspects:
352
- entity_subtype_aspect_counts[entity_type][sub_type][aspect] += count
409
+ for aspect, aspect_count in aspects.items():
410
+ entity_subtype_aspect_counts[entity_type][sub_type][aspect] += (
411
+ aspect_count * count
412
+ )
353
413
 
354
414
  self.aspects.clear()
355
415
  self.aspects_by_subtypes.clear()
@@ -1,5 +1,5 @@
1
1
  {
2
- "generated_at": "2025-07-11T05:33:33.512319+00:00",
2
+ "generated_at": "2025-07-14T09:20:09.632850+00:00",
3
3
  "generated_by": "metadata-ingestion/scripts/capability_summary.py",
4
4
  "plugin_details": {
5
5
  "abs": {
@@ -1628,6 +1628,14 @@
1628
1628
  },
1629
1629
  "mlflow": {
1630
1630
  "capabilities": [
1631
+ {
1632
+ "capability": "CONTAINERS",
1633
+ "description": "Extract ML experiments",
1634
+ "subtype_modifier": [
1635
+ "ML Experiment"
1636
+ ],
1637
+ "supported": true
1638
+ },
1631
1639
  {
1632
1640
  "capability": "DESCRIPTIONS",
1633
1641
  "description": "Extract descriptions for MLflow Registered Models and Model Versions",
@@ -3024,6 +3032,16 @@
3024
3032
  },
3025
3033
  "tableau": {
3026
3034
  "capabilities": [
3035
+ {
3036
+ "capability": "CONTAINERS",
3037
+ "description": "Enabled by default",
3038
+ "subtype_modifier": [
3039
+ "Project",
3040
+ "Site",
3041
+ "Workbook"
3042
+ ],
3043
+ "supported": true
3044
+ },
3027
3045
  {
3028
3046
  "capability": "LINEAGE_FINE",
3029
3047
  "description": "Enabled by default, configure using `extract_column_level_lineage`",
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ from dataclasses import dataclass
3
4
  from functools import lru_cache
4
5
  from pathlib import Path
5
6
  from typing import Dict, List, Optional
@@ -7,7 +8,85 @@ from typing import Dict, List, Optional
7
8
  logger = logging.getLogger(__name__)
8
9
 
9
10
  # Global cache for lineage data to avoid repeated file reads
10
- _lineage_data: Optional[Dict] = None
11
+ _lineage_data: Optional["LineageData"] = None
12
+
13
+
14
+ @dataclass
15
+ class Field:
16
+ name: str
17
+ path: str
18
+ isLineage: bool
19
+ relationship: Optional[Dict]
20
+
21
+
22
+ @dataclass
23
+ class Aspect:
24
+ name: str
25
+ fields: List[Field]
26
+
27
+
28
+ @dataclass
29
+ class Entity:
30
+ name: str
31
+ aspects: Dict[str, Aspect]
32
+
33
+
34
+ @dataclass
35
+ class LineageData:
36
+ # entity name -> aspect
37
+ entities: Dict[str, Entity]
38
+ generated_by: str
39
+ generated_at: str
40
+
41
+
42
+ def get_lineage_data() -> LineageData:
43
+ """
44
+ This is experimental internal API subject to breaking changes without prior notice.
45
+ """
46
+ global _lineage_data
47
+
48
+ if _lineage_data is not None:
49
+ return _lineage_data
50
+
51
+ raw_data = _load_lineage_data()
52
+ _entities = raw_data.get("entities", {})
53
+ for entity_name, entity_data in _entities.items():
54
+ entity = Entity(
55
+ name=entity_name,
56
+ aspects={},
57
+ )
58
+ for aspect_name, aspect_data in entity_data.items():
59
+ entity.aspects[aspect_name] = Aspect(
60
+ name=aspect_name,
61
+ fields=[
62
+ Field(
63
+ name=field["name"],
64
+ path=field["path"],
65
+ isLineage=field["isLineage"],
66
+ relationship=field.get("relationship", None),
67
+ )
68
+ for field in aspect_data.get("fields", [])
69
+ ],
70
+ )
71
+ _entities[entity_name] = entity
72
+
73
+ _lineage_data = LineageData(
74
+ entities=_entities,
75
+ generated_by=raw_data.get("generated_by", ""),
76
+ generated_at=raw_data.get("generated_at", ""),
77
+ )
78
+ return _lineage_data
79
+
80
+
81
+ def get_all_aspect_names() -> List[str]:
82
+ """
83
+ This is experimental internal API subject to breaking changes without prior notice.
84
+ """
85
+ entities = get_lineage_data().entities
86
+ if not entities:
87
+ return []
88
+ first_entity = next(iter(entities.values()))
89
+ return list(first_entity.aspects.keys())
11
90
 
12
91
 
13
92
  def _load_lineage_data() -> Dict:
@@ -22,11 +101,6 @@ def _load_lineage_data() -> Dict:
22
101
  Raises:
23
102
  json.JSONDecodeError: If lineage.json is malformed
24
103
  """
25
- global _lineage_data
26
-
27
- if _lineage_data is not None:
28
- return _lineage_data
29
-
30
104
  # Get the path to lineage.json relative to this file
31
105
  current_file = Path(__file__)
32
106
  lineage_file = current_file.parent / "lineage.json"
@@ -36,32 +110,40 @@ def _load_lineage_data() -> Dict:
36
110
  f"Lineage file not found: {lineage_file}. "
37
111
  "This may indicate a packaging issue. Lineage detection will be disabled."
38
112
  )
39
- _lineage_data = {}
40
- return _lineage_data
113
+ return {}
41
114
 
42
115
  try:
43
116
  with open(lineage_file, "r") as f:
44
- _lineage_data = json.load(f)
45
- return _lineage_data
117
+ return json.load(f)
46
118
  except json.JSONDecodeError as e:
47
119
  logger.error(
48
120
  f"Failed to parse lineage.json: {e}. Lineage detection will be disabled."
49
121
  )
50
- _lineage_data = {}
51
- return _lineage_data
122
+ return {}
52
123
 
53
124
 
54
125
  def _get_fields(entity_type: str, aspect_name: str) -> List[Dict]:
55
126
  """
56
127
  This is experimental internal API subject to breaking changes without prior notice.
57
128
  """
58
- return (
59
- _load_lineage_data()
60
- .get("entities", {})
61
- .get(entity_type, {})
62
- .get(aspect_name, {})
63
- .get("fields", [])
64
- )
129
+ lineage_data = get_lineage_data()
130
+ entity = lineage_data.entities.get(entity_type)
131
+ if not entity:
132
+ return []
133
+
134
+ aspect = entity.aspects.get(aspect_name)
135
+ if not aspect:
136
+ return []
137
+
138
+ return [
139
+ {
140
+ "name": field.name,
141
+ "path": field.path,
142
+ "isLineage": field.isLineage,
143
+ "relationship": field.relationship,
144
+ }
145
+ for field in aspect.fields
146
+ ]
65
147
 
66
148
 
67
149
  def _get_lineage_fields(entity_type: str, aspect_name: str) -> List[Dict]:
@@ -59,6 +59,8 @@ class BIContainerSubTypes(StrEnum):
59
59
  LOOKER_FOLDER = "Folder"
60
60
  LOOKML_PROJECT = "LookML Project"
61
61
  LOOKML_MODEL = "LookML Model"
62
+ TABLEAU_SITE = "Site"
63
+ TABLEAU_PROJECT = "Project"
62
64
  TABLEAU_WORKBOOK = "Workbook"
63
65
  POWERBI_DATASET = "Semantic Model"
64
66
  POWERBI_DATASET_TABLE = "Table"