acryl-datahub 0.15.0.1rc6__py3-none-any.whl → 0.15.0.1rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/METADATA +2390 -2390
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/RECORD +28 -27
- datahub/__init__.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +9 -0
- datahub/ingestion/source/looker/looker_source.py +19 -3
- datahub/ingestion/source/looker/looker_usage.py +23 -17
- datahub/ingestion/source/mode.py +40 -27
- datahub/ingestion/source/snowflake/snowflake_config.py +3 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -10
- datahub/ingestion/source/snowflake/snowflake_query.py +0 -9
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +1 -5
- datahub/ingestion/source/snowflake/snowflake_shares.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_v2.py +14 -6
- datahub/ingestion/source/tableau/tableau.py +51 -20
- datahub/ingestion/source_report/ingestion_stage.py +1 -0
- datahub/metadata/_schema_classes.py +195 -2
- datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
- datahub/metadata/schema.avsc +188 -4
- datahub/metadata/schemas/DataProcessInstanceKey.avsc +5 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +82 -0
- datahub/metadata/schemas/MLModelProperties.avsc +62 -2
- datahub/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +94 -2
- datahub/sql_parsing/tool_meta_extractor.py +4 -1
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,6 @@ from pydantic import BaseModel, Field, validator
|
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.datetimes import parse_absolute_time
|
|
10
10
|
from datahub.ingestion.api.closeable import Closeable
|
|
11
|
-
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
12
11
|
from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
|
|
13
12
|
from datahub.ingestion.source.snowflake.constants import (
|
|
14
13
|
LINEAGE_PERMISSION_ERROR,
|
|
@@ -163,11 +162,11 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
163
162
|
self.config.end_time,
|
|
164
163
|
)
|
|
165
164
|
|
|
166
|
-
def
|
|
165
|
+
def add_time_based_lineage_to_aggregator(
|
|
167
166
|
self,
|
|
168
167
|
discovered_tables: List[str],
|
|
169
168
|
discovered_views: List[str],
|
|
170
|
-
) ->
|
|
169
|
+
) -> None:
|
|
171
170
|
if not self._should_ingest_lineage():
|
|
172
171
|
return
|
|
173
172
|
|
|
@@ -177,9 +176,7 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
177
176
|
# snowflake view/table -> snowflake table
|
|
178
177
|
self.populate_table_upstreams(discovered_tables)
|
|
179
178
|
|
|
180
|
-
|
|
181
|
-
yield mcp.as_workunit()
|
|
182
|
-
|
|
179
|
+
def update_state(self):
|
|
183
180
|
if self.redundant_run_skip_handler:
|
|
184
181
|
# Update the checkpoint state for this run.
|
|
185
182
|
self.redundant_run_skip_handler.update_state(
|
|
@@ -337,10 +334,6 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
337
334
|
start_time_millis=int(self.start_time.timestamp() * 1000),
|
|
338
335
|
end_time_millis=int(self.end_time.timestamp() * 1000),
|
|
339
336
|
upstreams_deny_pattern=self.config.temporary_tables_pattern,
|
|
340
|
-
# The self.config.include_view_lineage setting is about fetching upstreams of views.
|
|
341
|
-
# We always generate lineage pointing at views from tables, even if self.config.include_view_lineage is False.
|
|
342
|
-
# TODO: Remove this `include_view_lineage` flag, since it's effectively dead code.
|
|
343
|
-
include_view_lineage=True,
|
|
344
337
|
include_column_lineage=self.config.include_column_lineage,
|
|
345
338
|
)
|
|
346
339
|
try:
|
|
@@ -376,7 +376,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
376
376
|
def table_to_table_lineage_history_v2(
|
|
377
377
|
start_time_millis: int,
|
|
378
378
|
end_time_millis: int,
|
|
379
|
-
include_view_lineage: bool = True,
|
|
380
379
|
include_column_lineage: bool = True,
|
|
381
380
|
upstreams_deny_pattern: List[str] = DEFAULT_TEMP_TABLES_PATTERNS,
|
|
382
381
|
) -> str:
|
|
@@ -385,14 +384,12 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
385
384
|
start_time_millis,
|
|
386
385
|
end_time_millis,
|
|
387
386
|
upstreams_deny_pattern,
|
|
388
|
-
include_view_lineage,
|
|
389
387
|
)
|
|
390
388
|
else:
|
|
391
389
|
return SnowflakeQuery.table_upstreams_only(
|
|
392
390
|
start_time_millis,
|
|
393
391
|
end_time_millis,
|
|
394
392
|
upstreams_deny_pattern,
|
|
395
|
-
include_view_lineage,
|
|
396
393
|
)
|
|
397
394
|
|
|
398
395
|
@staticmethod
|
|
@@ -677,12 +674,9 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
677
674
|
start_time_millis: int,
|
|
678
675
|
end_time_millis: int,
|
|
679
676
|
upstreams_deny_pattern: List[str],
|
|
680
|
-
include_view_lineage: bool = True,
|
|
681
677
|
) -> str:
|
|
682
678
|
allowed_upstream_table_domains = (
|
|
683
679
|
SnowflakeQuery.ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER
|
|
684
|
-
if include_view_lineage
|
|
685
|
-
else SnowflakeQuery.ACCESS_HISTORY_TABLE_DOMAINS_FILTER
|
|
686
680
|
)
|
|
687
681
|
|
|
688
682
|
upstream_sql_filter = create_deny_regex_sql_filter(
|
|
@@ -847,12 +841,9 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
847
841
|
start_time_millis: int,
|
|
848
842
|
end_time_millis: int,
|
|
849
843
|
upstreams_deny_pattern: List[str],
|
|
850
|
-
include_view_lineage: bool = True,
|
|
851
844
|
) -> str:
|
|
852
845
|
allowed_upstream_table_domains = (
|
|
853
846
|
SnowflakeQuery.ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER
|
|
854
|
-
if include_view_lineage
|
|
855
|
-
else SnowflakeQuery.ACCESS_HISTORY_TABLE_DOMAINS_FILTER
|
|
856
847
|
)
|
|
857
848
|
|
|
858
849
|
upstream_sql_filter = create_deny_regex_sql_filter(
|
|
@@ -435,11 +435,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
435
435
|
)
|
|
436
436
|
|
|
437
437
|
if self.config.include_views:
|
|
438
|
-
if
|
|
439
|
-
self.aggregator
|
|
440
|
-
and self.config.include_view_lineage
|
|
441
|
-
and self.config.parse_view_ddl
|
|
442
|
-
):
|
|
438
|
+
if self.aggregator:
|
|
443
439
|
for view in views:
|
|
444
440
|
view_identifier = self.identifiers.get_dataset_identifier(
|
|
445
441
|
view.name, schema_name, db_name
|
|
@@ -72,7 +72,7 @@ class SnowflakeSharesHandler(SnowflakeCommonMixin):
|
|
|
72
72
|
assert len(sibling_dbs) == 1
|
|
73
73
|
# SnowflakeLineageExtractor is unaware of database->schema->table hierarchy
|
|
74
74
|
# hence this lineage code is not written in SnowflakeLineageExtractor
|
|
75
|
-
# also this is not governed by configs include_table_lineage
|
|
75
|
+
# also this is not governed by configs include_table_lineage
|
|
76
76
|
yield self.get_upstream_lineage_with_primary_sibling(
|
|
77
77
|
db.name, schema.name, table_name, sibling_dbs[0]
|
|
78
78
|
)
|
|
@@ -82,6 +82,7 @@ from datahub.ingestion.source_report.ingestion_stage import (
|
|
|
82
82
|
LINEAGE_EXTRACTION,
|
|
83
83
|
METADATA_EXTRACTION,
|
|
84
84
|
QUERIES_EXTRACTION,
|
|
85
|
+
VIEW_PARSING,
|
|
85
86
|
)
|
|
86
87
|
from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator
|
|
87
88
|
from datahub.utilities.registries.domain_registry import DomainRegistry
|
|
@@ -103,7 +104,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
103
104
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
104
105
|
@capability(
|
|
105
106
|
SourceCapability.LINEAGE_COARSE,
|
|
106
|
-
"Enabled by default, can be disabled via configuration `include_table_lineage`
|
|
107
|
+
"Enabled by default, can be disabled via configuration `include_table_lineage`",
|
|
107
108
|
)
|
|
108
109
|
@capability(
|
|
109
110
|
SourceCapability.LINEAGE_FINE,
|
|
@@ -512,15 +513,14 @@ class SnowflakeV2Source(
|
|
|
512
513
|
discovered_datasets = discovered_tables + discovered_views
|
|
513
514
|
|
|
514
515
|
if self.config.use_queries_v2:
|
|
515
|
-
self.report.set_ingestion_stage("*",
|
|
516
|
-
assert self.aggregator is not None
|
|
516
|
+
self.report.set_ingestion_stage("*", VIEW_PARSING)
|
|
517
517
|
yield from auto_workunit(self.aggregator.gen_metadata())
|
|
518
518
|
|
|
519
519
|
self.report.set_ingestion_stage("*", QUERIES_EXTRACTION)
|
|
520
520
|
|
|
521
521
|
schema_resolver = self.aggregator._schema_resolver
|
|
522
522
|
|
|
523
|
-
queries_extractor
|
|
523
|
+
queries_extractor = SnowflakeQueriesExtractor(
|
|
524
524
|
connection=self.connection,
|
|
525
525
|
config=SnowflakeQueriesExtractorConfig(
|
|
526
526
|
window=self.config,
|
|
@@ -546,13 +546,21 @@ class SnowflakeV2Source(
|
|
|
546
546
|
queries_extractor.close()
|
|
547
547
|
|
|
548
548
|
else:
|
|
549
|
-
if self.
|
|
549
|
+
if self.lineage_extractor:
|
|
550
550
|
self.report.set_ingestion_stage("*", LINEAGE_EXTRACTION)
|
|
551
|
-
|
|
551
|
+
self.lineage_extractor.add_time_based_lineage_to_aggregator(
|
|
552
552
|
discovered_tables=discovered_tables,
|
|
553
553
|
discovered_views=discovered_views,
|
|
554
554
|
)
|
|
555
555
|
|
|
556
|
+
# This would emit view and external table ddl lineage
|
|
557
|
+
# as well as query lineage via lineage_extractor
|
|
558
|
+
for mcp in self.aggregator.gen_metadata():
|
|
559
|
+
yield mcp.as_workunit()
|
|
560
|
+
|
|
561
|
+
if self.lineage_extractor:
|
|
562
|
+
self.lineage_extractor.update_state()
|
|
563
|
+
|
|
556
564
|
if (
|
|
557
565
|
self.config.include_usage_stats or self.config.include_operational_stats
|
|
558
566
|
) and self.usage_extractor:
|
|
@@ -35,7 +35,10 @@ from tableauserverclient import (
|
|
|
35
35
|
SiteItem,
|
|
36
36
|
TableauAuth,
|
|
37
37
|
)
|
|
38
|
-
from tableauserverclient.server.endpoint.exceptions import
|
|
38
|
+
from tableauserverclient.server.endpoint.exceptions import (
|
|
39
|
+
InternalServerError,
|
|
40
|
+
NonXMLResponseError,
|
|
41
|
+
)
|
|
39
42
|
from urllib3 import Retry
|
|
40
43
|
|
|
41
44
|
import datahub.emitter.mce_builder as builder
|
|
@@ -618,6 +621,12 @@ class DatabaseTable:
|
|
|
618
621
|
self.parsed_columns = parsed_columns
|
|
619
622
|
|
|
620
623
|
|
|
624
|
+
@dataclass
|
|
625
|
+
class SiteIdContentUrl:
|
|
626
|
+
site_id: str
|
|
627
|
+
site_content_url: str
|
|
628
|
+
|
|
629
|
+
|
|
621
630
|
class TableauSourceReport(StaleEntityRemovalSourceReport):
|
|
622
631
|
get_all_datasources_query_failed: bool = False
|
|
623
632
|
num_get_datasource_query_failures: int = 0
|
|
@@ -770,7 +779,6 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
770
779
|
config=self.config,
|
|
771
780
|
ctx=self.ctx,
|
|
772
781
|
site=site,
|
|
773
|
-
site_id=site.id,
|
|
774
782
|
report=self.report,
|
|
775
783
|
server=self.server,
|
|
776
784
|
platform=self.platform,
|
|
@@ -789,8 +797,11 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
789
797
|
site_source = TableauSiteSource(
|
|
790
798
|
config=self.config,
|
|
791
799
|
ctx=self.ctx,
|
|
792
|
-
site=site
|
|
793
|
-
|
|
800
|
+
site=site
|
|
801
|
+
if site
|
|
802
|
+
else SiteIdContentUrl(
|
|
803
|
+
site_id=self.server.site_id, site_content_url=self.config.site
|
|
804
|
+
),
|
|
794
805
|
report=self.report,
|
|
795
806
|
server=self.server,
|
|
796
807
|
platform=self.platform,
|
|
@@ -823,8 +834,7 @@ class TableauSiteSource:
|
|
|
823
834
|
self,
|
|
824
835
|
config: TableauConfig,
|
|
825
836
|
ctx: PipelineContext,
|
|
826
|
-
site:
|
|
827
|
-
site_id: Optional[str],
|
|
837
|
+
site: Union[SiteItem, SiteIdContentUrl],
|
|
828
838
|
report: TableauSourceReport,
|
|
829
839
|
server: Server,
|
|
830
840
|
platform: str,
|
|
@@ -835,13 +845,18 @@ class TableauSiteSource:
|
|
|
835
845
|
self.ctx: PipelineContext = ctx
|
|
836
846
|
self.platform = platform
|
|
837
847
|
|
|
838
|
-
self.site: Optional[SiteItem] =
|
|
839
|
-
if
|
|
840
|
-
self.
|
|
848
|
+
self.site: Optional[SiteItem] = None
|
|
849
|
+
if isinstance(site, SiteItem):
|
|
850
|
+
self.site = site
|
|
851
|
+
assert site.id is not None, "Site ID is required"
|
|
852
|
+
self.site_id = site.id
|
|
853
|
+
self.site_content_url = site.content_url
|
|
854
|
+
elif isinstance(site, SiteIdContentUrl):
|
|
855
|
+
self.site = None
|
|
856
|
+
self.site_id = site.site_id
|
|
857
|
+
self.site_content_url = site.site_content_url
|
|
841
858
|
else:
|
|
842
|
-
|
|
843
|
-
assert self.site.id is not None, "site_id is required when site is provided"
|
|
844
|
-
self.site_id = self.site.id
|
|
859
|
+
raise AssertionError("site or site id+content_url pair is required")
|
|
845
860
|
|
|
846
861
|
self.database_tables: Dict[str, DatabaseTable] = {}
|
|
847
862
|
self.tableau_stat_registry: Dict[str, UsageStat] = {}
|
|
@@ -895,16 +910,14 @@ class TableauSiteSource:
|
|
|
895
910
|
# datasets also have the env in the browse path
|
|
896
911
|
return f"/{self.config.env.lower()}{self.no_env_browse_prefix}"
|
|
897
912
|
|
|
898
|
-
def _re_authenticate(self):
|
|
913
|
+
def _re_authenticate(self) -> None:
|
|
914
|
+
self.report.info(
|
|
915
|
+
message="Re-authenticating to Tableau",
|
|
916
|
+
context=f"site='{self.site_content_url}'",
|
|
917
|
+
)
|
|
899
918
|
# Sign-in again may not be enough because Tableau sometimes caches invalid sessions
|
|
900
919
|
# so we need to recreate the Tableau Server object
|
|
901
|
-
self.server = self.config.make_tableau_client(self.
|
|
902
|
-
|
|
903
|
-
@property
|
|
904
|
-
def site_content_url(self) -> Optional[str]:
|
|
905
|
-
if self.site and self.site.content_url:
|
|
906
|
-
return self.site.content_url
|
|
907
|
-
return None
|
|
920
|
+
self.server = self.config.make_tableau_client(self.site_content_url)
|
|
908
921
|
|
|
909
922
|
def _populate_usage_stat_registry(self) -> None:
|
|
910
923
|
if self.server is None:
|
|
@@ -1196,6 +1209,24 @@ class TableauSiteSource:
|
|
|
1196
1209
|
retry_on_auth_error=False,
|
|
1197
1210
|
retries_remaining=retries_remaining - 1,
|
|
1198
1211
|
)
|
|
1212
|
+
|
|
1213
|
+
except InternalServerError as ise:
|
|
1214
|
+
# In some cases Tableau Server returns 504 error, which is a timeout error, so it worths to retry.
|
|
1215
|
+
if ise.code == 504:
|
|
1216
|
+
if retries_remaining <= 0:
|
|
1217
|
+
raise ise
|
|
1218
|
+
return self.get_connection_object_page(
|
|
1219
|
+
query=query,
|
|
1220
|
+
connection_type=connection_type,
|
|
1221
|
+
query_filter=query_filter,
|
|
1222
|
+
fetch_size=fetch_size,
|
|
1223
|
+
current_cursor=current_cursor,
|
|
1224
|
+
retry_on_auth_error=False,
|
|
1225
|
+
retries_remaining=retries_remaining - 1,
|
|
1226
|
+
)
|
|
1227
|
+
else:
|
|
1228
|
+
raise ise
|
|
1229
|
+
|
|
1199
1230
|
except OSError:
|
|
1200
1231
|
# In tableauseverclient 0.26 (which was yanked and released in 0.28 on 2023-10-04),
|
|
1201
1232
|
# the request logic was changed to use threads.
|
|
@@ -15,6 +15,7 @@ USAGE_EXTRACTION_INGESTION = "Usage Extraction Ingestion"
|
|
|
15
15
|
USAGE_EXTRACTION_OPERATIONAL_STATS = "Usage Extraction Operational Stats"
|
|
16
16
|
USAGE_EXTRACTION_USAGE_AGGREGATION = "Usage Extraction Usage Aggregation"
|
|
17
17
|
EXTERNAL_TABLE_DDL_LINEAGE = "External table DDL Lineage"
|
|
18
|
+
VIEW_PARSING = "View Parsing"
|
|
18
19
|
QUERIES_EXTRACTION = "Queries Extraction"
|
|
19
20
|
PROFILING = "Profiling"
|
|
20
21
|
|
|
@@ -14737,7 +14737,7 @@ class DataProcessInstanceKeyClass(_Aspect):
|
|
|
14737
14737
|
|
|
14738
14738
|
|
|
14739
14739
|
ASPECT_NAME = 'dataProcessInstanceKey'
|
|
14740
|
-
ASPECT_INFO = {'keyForEntity': 'dataProcessInstance', 'entityCategory': '_unset_', 'entityAspects': ['dataProcessInstanceInput', 'dataProcessInstanceOutput', 'dataProcessInstanceProperties', 'dataProcessInstanceRelationships', 'dataProcessInstanceRunEvent', 'status', 'testResults'], 'entityDoc': 'DataProcessInstance represents an instance of a datajob/jobflow run'}
|
|
14740
|
+
ASPECT_INFO = {'keyForEntity': 'dataProcessInstance', 'entityCategory': '_unset_', 'entityAspects': ['dataProcessInstanceInput', 'dataProcessInstanceOutput', 'dataProcessInstanceProperties', 'dataProcessInstanceRelationships', 'dataProcessInstanceRunEvent', 'status', 'testResults', 'dataPlatformInstance', 'subTypes', 'container', 'mlTrainingRunProperties'], 'entityDoc': 'DataProcessInstance represents an instance of a datajob/jobflow run'}
|
|
14741
14741
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataProcessInstanceKey")
|
|
14742
14742
|
|
|
14743
14743
|
def __init__(self,
|
|
@@ -17715,8 +17715,12 @@ class MLModelGroupPropertiesClass(_Aspect):
|
|
|
17715
17715
|
|
|
17716
17716
|
def __init__(self,
|
|
17717
17717
|
customProperties: Optional[Dict[str, str]]=None,
|
|
17718
|
+
name: Union[None, str]=None,
|
|
17718
17719
|
description: Union[None, str]=None,
|
|
17719
17720
|
createdAt: Union[None, int]=None,
|
|
17721
|
+
created: Union[None, "TimeStampClass"]=None,
|
|
17722
|
+
lastModified: Union[None, "TimeStampClass"]=None,
|
|
17723
|
+
trainingJobs: Union[None, List[str]]=None,
|
|
17720
17724
|
version: Union[None, "VersionTagClass"]=None,
|
|
17721
17725
|
):
|
|
17722
17726
|
super().__init__()
|
|
@@ -17726,14 +17730,22 @@ class MLModelGroupPropertiesClass(_Aspect):
|
|
|
17726
17730
|
self.customProperties = dict()
|
|
17727
17731
|
else:
|
|
17728
17732
|
self.customProperties = customProperties
|
|
17733
|
+
self.name = name
|
|
17729
17734
|
self.description = description
|
|
17730
17735
|
self.createdAt = createdAt
|
|
17736
|
+
self.created = created
|
|
17737
|
+
self.lastModified = lastModified
|
|
17738
|
+
self.trainingJobs = trainingJobs
|
|
17731
17739
|
self.version = version
|
|
17732
17740
|
|
|
17733
17741
|
def _restore_defaults(self) -> None:
|
|
17734
17742
|
self.customProperties = dict()
|
|
17743
|
+
self.name = self.RECORD_SCHEMA.fields_dict["name"].default
|
|
17735
17744
|
self.description = self.RECORD_SCHEMA.fields_dict["description"].default
|
|
17736
17745
|
self.createdAt = self.RECORD_SCHEMA.fields_dict["createdAt"].default
|
|
17746
|
+
self.created = self.RECORD_SCHEMA.fields_dict["created"].default
|
|
17747
|
+
self.lastModified = self.RECORD_SCHEMA.fields_dict["lastModified"].default
|
|
17748
|
+
self.trainingJobs = self.RECORD_SCHEMA.fields_dict["trainingJobs"].default
|
|
17737
17749
|
self.version = self.RECORD_SCHEMA.fields_dict["version"].default
|
|
17738
17750
|
|
|
17739
17751
|
|
|
@@ -17747,6 +17759,16 @@ class MLModelGroupPropertiesClass(_Aspect):
|
|
|
17747
17759
|
self._inner_dict['customProperties'] = value
|
|
17748
17760
|
|
|
17749
17761
|
|
|
17762
|
+
@property
|
|
17763
|
+
def name(self) -> Union[None, str]:
|
|
17764
|
+
"""Display name of the MLModelGroup"""
|
|
17765
|
+
return self._inner_dict.get('name') # type: ignore
|
|
17766
|
+
|
|
17767
|
+
@name.setter
|
|
17768
|
+
def name(self, value: Union[None, str]) -> None:
|
|
17769
|
+
self._inner_dict['name'] = value
|
|
17770
|
+
|
|
17771
|
+
|
|
17750
17772
|
@property
|
|
17751
17773
|
def description(self) -> Union[None, str]:
|
|
17752
17774
|
"""Documentation of the MLModelGroup"""
|
|
@@ -17767,6 +17789,36 @@ class MLModelGroupPropertiesClass(_Aspect):
|
|
|
17767
17789
|
self._inner_dict['createdAt'] = value
|
|
17768
17790
|
|
|
17769
17791
|
|
|
17792
|
+
@property
|
|
17793
|
+
def created(self) -> Union[None, "TimeStampClass"]:
|
|
17794
|
+
"""Time and Actor who created the MLModelGroup"""
|
|
17795
|
+
return self._inner_dict.get('created') # type: ignore
|
|
17796
|
+
|
|
17797
|
+
@created.setter
|
|
17798
|
+
def created(self, value: Union[None, "TimeStampClass"]) -> None:
|
|
17799
|
+
self._inner_dict['created'] = value
|
|
17800
|
+
|
|
17801
|
+
|
|
17802
|
+
@property
|
|
17803
|
+
def lastModified(self) -> Union[None, "TimeStampClass"]:
|
|
17804
|
+
"""Date when the MLModelGroup was last modified"""
|
|
17805
|
+
return self._inner_dict.get('lastModified') # type: ignore
|
|
17806
|
+
|
|
17807
|
+
@lastModified.setter
|
|
17808
|
+
def lastModified(self, value: Union[None, "TimeStampClass"]) -> None:
|
|
17809
|
+
self._inner_dict['lastModified'] = value
|
|
17810
|
+
|
|
17811
|
+
|
|
17812
|
+
@property
|
|
17813
|
+
def trainingJobs(self) -> Union[None, List[str]]:
|
|
17814
|
+
"""List of jobs (if any) used to train the model group. Visible in Lineage."""
|
|
17815
|
+
return self._inner_dict.get('trainingJobs') # type: ignore
|
|
17816
|
+
|
|
17817
|
+
@trainingJobs.setter
|
|
17818
|
+
def trainingJobs(self, value: Union[None, List[str]]) -> None:
|
|
17819
|
+
self._inner_dict['trainingJobs'] = value
|
|
17820
|
+
|
|
17821
|
+
|
|
17770
17822
|
@property
|
|
17771
17823
|
def version(self) -> Union[None, "VersionTagClass"]:
|
|
17772
17824
|
"""Version of the MLModelGroup"""
|
|
@@ -17788,8 +17840,11 @@ class MLModelPropertiesClass(_Aspect):
|
|
|
17788
17840
|
def __init__(self,
|
|
17789
17841
|
customProperties: Optional[Dict[str, str]]=None,
|
|
17790
17842
|
externalUrl: Union[None, str]=None,
|
|
17843
|
+
name: Union[None, str]=None,
|
|
17791
17844
|
description: Union[None, str]=None,
|
|
17792
17845
|
date: Union[None, int]=None,
|
|
17846
|
+
created: Union[None, "TimeStampClass"]=None,
|
|
17847
|
+
lastModified: Union[None, "TimeStampClass"]=None,
|
|
17793
17848
|
version: Union[None, "VersionTagClass"]=None,
|
|
17794
17849
|
type: Union[None, str]=None,
|
|
17795
17850
|
hyperParameters: Union[None, Dict[str, Union[str, int, float, float, bool]]]=None,
|
|
@@ -17811,8 +17866,11 @@ class MLModelPropertiesClass(_Aspect):
|
|
|
17811
17866
|
else:
|
|
17812
17867
|
self.customProperties = customProperties
|
|
17813
17868
|
self.externalUrl = externalUrl
|
|
17869
|
+
self.name = name
|
|
17814
17870
|
self.description = description
|
|
17815
17871
|
self.date = date
|
|
17872
|
+
self.created = created
|
|
17873
|
+
self.lastModified = lastModified
|
|
17816
17874
|
self.version = version
|
|
17817
17875
|
self.type = type
|
|
17818
17876
|
self.hyperParameters = hyperParameters
|
|
@@ -17833,8 +17891,11 @@ class MLModelPropertiesClass(_Aspect):
|
|
|
17833
17891
|
def _restore_defaults(self) -> None:
|
|
17834
17892
|
self.customProperties = dict()
|
|
17835
17893
|
self.externalUrl = self.RECORD_SCHEMA.fields_dict["externalUrl"].default
|
|
17894
|
+
self.name = self.RECORD_SCHEMA.fields_dict["name"].default
|
|
17836
17895
|
self.description = self.RECORD_SCHEMA.fields_dict["description"].default
|
|
17837
17896
|
self.date = self.RECORD_SCHEMA.fields_dict["date"].default
|
|
17897
|
+
self.created = self.RECORD_SCHEMA.fields_dict["created"].default
|
|
17898
|
+
self.lastModified = self.RECORD_SCHEMA.fields_dict["lastModified"].default
|
|
17838
17899
|
self.version = self.RECORD_SCHEMA.fields_dict["version"].default
|
|
17839
17900
|
self.type = self.RECORD_SCHEMA.fields_dict["type"].default
|
|
17840
17901
|
self.hyperParameters = self.RECORD_SCHEMA.fields_dict["hyperParameters"].default
|
|
@@ -17869,6 +17930,16 @@ class MLModelPropertiesClass(_Aspect):
|
|
|
17869
17930
|
self._inner_dict['externalUrl'] = value
|
|
17870
17931
|
|
|
17871
17932
|
|
|
17933
|
+
@property
|
|
17934
|
+
def name(self) -> Union[None, str]:
|
|
17935
|
+
"""Display name of the MLModel"""
|
|
17936
|
+
return self._inner_dict.get('name') # type: ignore
|
|
17937
|
+
|
|
17938
|
+
@name.setter
|
|
17939
|
+
def name(self, value: Union[None, str]) -> None:
|
|
17940
|
+
self._inner_dict['name'] = value
|
|
17941
|
+
|
|
17942
|
+
|
|
17872
17943
|
@property
|
|
17873
17944
|
def description(self) -> Union[None, str]:
|
|
17874
17945
|
"""Documentation of the MLModel"""
|
|
@@ -17889,6 +17960,26 @@ class MLModelPropertiesClass(_Aspect):
|
|
|
17889
17960
|
self._inner_dict['date'] = value
|
|
17890
17961
|
|
|
17891
17962
|
|
|
17963
|
+
@property
|
|
17964
|
+
def created(self) -> Union[None, "TimeStampClass"]:
|
|
17965
|
+
"""Audit stamp containing who created this and when"""
|
|
17966
|
+
return self._inner_dict.get('created') # type: ignore
|
|
17967
|
+
|
|
17968
|
+
@created.setter
|
|
17969
|
+
def created(self, value: Union[None, "TimeStampClass"]) -> None:
|
|
17970
|
+
self._inner_dict['created'] = value
|
|
17971
|
+
|
|
17972
|
+
|
|
17973
|
+
@property
|
|
17974
|
+
def lastModified(self) -> Union[None, "TimeStampClass"]:
|
|
17975
|
+
"""Date when the MLModel was last modified"""
|
|
17976
|
+
return self._inner_dict.get('lastModified') # type: ignore
|
|
17977
|
+
|
|
17978
|
+
@lastModified.setter
|
|
17979
|
+
def lastModified(self, value: Union[None, "TimeStampClass"]) -> None:
|
|
17980
|
+
self._inner_dict['lastModified'] = value
|
|
17981
|
+
|
|
17982
|
+
|
|
17892
17983
|
@property
|
|
17893
17984
|
def version(self) -> Union[None, "VersionTagClass"]:
|
|
17894
17985
|
"""Version of the MLModel"""
|
|
@@ -17983,7 +18074,7 @@ class MLModelPropertiesClass(_Aspect):
|
|
|
17983
18074
|
|
|
17984
18075
|
@property
|
|
17985
18076
|
def trainingJobs(self) -> Union[None, List[str]]:
|
|
17986
|
-
"""List of jobs (if any) used to train the model"""
|
|
18077
|
+
"""List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."""
|
|
17987
18078
|
return self._inner_dict.get('trainingJobs') # type: ignore
|
|
17988
18079
|
|
|
17989
18080
|
@trainingJobs.setter
|
|
@@ -18096,6 +18187,104 @@ class MLPrimaryKeyPropertiesClass(_Aspect):
|
|
|
18096
18187
|
self._inner_dict['sources'] = value
|
|
18097
18188
|
|
|
18098
18189
|
|
|
18190
|
+
class MLTrainingRunPropertiesClass(_Aspect):
|
|
18191
|
+
"""The inputs and outputs of this training run"""
|
|
18192
|
+
|
|
18193
|
+
|
|
18194
|
+
ASPECT_NAME = 'mlTrainingRunProperties'
|
|
18195
|
+
ASPECT_INFO = {}
|
|
18196
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLTrainingRunProperties")
|
|
18197
|
+
|
|
18198
|
+
def __init__(self,
|
|
18199
|
+
customProperties: Optional[Dict[str, str]]=None,
|
|
18200
|
+
externalUrl: Union[None, str]=None,
|
|
18201
|
+
id: Union[None, str]=None,
|
|
18202
|
+
outputUrls: Union[None, List[str]]=None,
|
|
18203
|
+
hyperParams: Union[None, List["MLHyperParamClass"]]=None,
|
|
18204
|
+
trainingMetrics: Union[None, List["MLMetricClass"]]=None,
|
|
18205
|
+
):
|
|
18206
|
+
super().__init__()
|
|
18207
|
+
|
|
18208
|
+
if customProperties is None:
|
|
18209
|
+
# default: {}
|
|
18210
|
+
self.customProperties = dict()
|
|
18211
|
+
else:
|
|
18212
|
+
self.customProperties = customProperties
|
|
18213
|
+
self.externalUrl = externalUrl
|
|
18214
|
+
self.id = id
|
|
18215
|
+
self.outputUrls = outputUrls
|
|
18216
|
+
self.hyperParams = hyperParams
|
|
18217
|
+
self.trainingMetrics = trainingMetrics
|
|
18218
|
+
|
|
18219
|
+
def _restore_defaults(self) -> None:
|
|
18220
|
+
self.customProperties = dict()
|
|
18221
|
+
self.externalUrl = self.RECORD_SCHEMA.fields_dict["externalUrl"].default
|
|
18222
|
+
self.id = self.RECORD_SCHEMA.fields_dict["id"].default
|
|
18223
|
+
self.outputUrls = self.RECORD_SCHEMA.fields_dict["outputUrls"].default
|
|
18224
|
+
self.hyperParams = self.RECORD_SCHEMA.fields_dict["hyperParams"].default
|
|
18225
|
+
self.trainingMetrics = self.RECORD_SCHEMA.fields_dict["trainingMetrics"].default
|
|
18226
|
+
|
|
18227
|
+
|
|
18228
|
+
@property
|
|
18229
|
+
def customProperties(self) -> Dict[str, str]:
|
|
18230
|
+
"""Custom property bag."""
|
|
18231
|
+
return self._inner_dict.get('customProperties') # type: ignore
|
|
18232
|
+
|
|
18233
|
+
@customProperties.setter
|
|
18234
|
+
def customProperties(self, value: Dict[str, str]) -> None:
|
|
18235
|
+
self._inner_dict['customProperties'] = value
|
|
18236
|
+
|
|
18237
|
+
|
|
18238
|
+
@property
|
|
18239
|
+
def externalUrl(self) -> Union[None, str]:
|
|
18240
|
+
"""URL where the reference exist"""
|
|
18241
|
+
return self._inner_dict.get('externalUrl') # type: ignore
|
|
18242
|
+
|
|
18243
|
+
@externalUrl.setter
|
|
18244
|
+
def externalUrl(self, value: Union[None, str]) -> None:
|
|
18245
|
+
self._inner_dict['externalUrl'] = value
|
|
18246
|
+
|
|
18247
|
+
|
|
18248
|
+
@property
|
|
18249
|
+
def id(self) -> Union[None, str]:
|
|
18250
|
+
"""Run Id of the ML Training Run"""
|
|
18251
|
+
return self._inner_dict.get('id') # type: ignore
|
|
18252
|
+
|
|
18253
|
+
@id.setter
|
|
18254
|
+
def id(self, value: Union[None, str]) -> None:
|
|
18255
|
+
self._inner_dict['id'] = value
|
|
18256
|
+
|
|
18257
|
+
|
|
18258
|
+
@property
|
|
18259
|
+
def outputUrls(self) -> Union[None, List[str]]:
|
|
18260
|
+
"""List of URLs for the Outputs of the ML Training Run"""
|
|
18261
|
+
return self._inner_dict.get('outputUrls') # type: ignore
|
|
18262
|
+
|
|
18263
|
+
@outputUrls.setter
|
|
18264
|
+
def outputUrls(self, value: Union[None, List[str]]) -> None:
|
|
18265
|
+
self._inner_dict['outputUrls'] = value
|
|
18266
|
+
|
|
18267
|
+
|
|
18268
|
+
@property
|
|
18269
|
+
def hyperParams(self) -> Union[None, List["MLHyperParamClass"]]:
|
|
18270
|
+
"""Hyperparameters of the ML Training Run"""
|
|
18271
|
+
return self._inner_dict.get('hyperParams') # type: ignore
|
|
18272
|
+
|
|
18273
|
+
@hyperParams.setter
|
|
18274
|
+
def hyperParams(self, value: Union[None, List["MLHyperParamClass"]]) -> None:
|
|
18275
|
+
self._inner_dict['hyperParams'] = value
|
|
18276
|
+
|
|
18277
|
+
|
|
18278
|
+
@property
|
|
18279
|
+
def trainingMetrics(self) -> Union[None, List["MLMetricClass"]]:
|
|
18280
|
+
"""Metrics of the ML Training Run"""
|
|
18281
|
+
return self._inner_dict.get('trainingMetrics') # type: ignore
|
|
18282
|
+
|
|
18283
|
+
@trainingMetrics.setter
|
|
18284
|
+
def trainingMetrics(self, value: Union[None, List["MLMetricClass"]]) -> None:
|
|
18285
|
+
self._inner_dict['trainingMetrics'] = value
|
|
18286
|
+
|
|
18287
|
+
|
|
18099
18288
|
class MetricsClass(_Aspect):
|
|
18100
18289
|
"""Metrics to be featured for the MLModel."""
|
|
18101
18290
|
|
|
@@ -24791,6 +24980,7 @@ __SCHEMA_TYPES = {
|
|
|
24791
24980
|
'com.linkedin.pegasus2avro.ml.metadata.MLModelGroupProperties': MLModelGroupPropertiesClass,
|
|
24792
24981
|
'com.linkedin.pegasus2avro.ml.metadata.MLModelProperties': MLModelPropertiesClass,
|
|
24793
24982
|
'com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties': MLPrimaryKeyPropertiesClass,
|
|
24983
|
+
'com.linkedin.pegasus2avro.ml.metadata.MLTrainingRunProperties': MLTrainingRunPropertiesClass,
|
|
24794
24984
|
'com.linkedin.pegasus2avro.ml.metadata.Metrics': MetricsClass,
|
|
24795
24985
|
'com.linkedin.pegasus2avro.ml.metadata.QuantitativeAnalyses': QuantitativeAnalysesClass,
|
|
24796
24986
|
'com.linkedin.pegasus2avro.ml.metadata.SourceCode': SourceCodeClass,
|
|
@@ -25257,6 +25447,7 @@ __SCHEMA_TYPES = {
|
|
|
25257
25447
|
'MLModelGroupProperties': MLModelGroupPropertiesClass,
|
|
25258
25448
|
'MLModelProperties': MLModelPropertiesClass,
|
|
25259
25449
|
'MLPrimaryKeyProperties': MLPrimaryKeyPropertiesClass,
|
|
25450
|
+
'MLTrainingRunProperties': MLTrainingRunPropertiesClass,
|
|
25260
25451
|
'Metrics': MetricsClass,
|
|
25261
25452
|
'QuantitativeAnalyses': QuantitativeAnalysesClass,
|
|
25262
25453
|
'SourceCode': SourceCodeClass,
|
|
@@ -25473,6 +25664,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
|
|
|
25473
25664
|
MetricsClass,
|
|
25474
25665
|
EditableMLPrimaryKeyPropertiesClass,
|
|
25475
25666
|
SourceCodeClass,
|
|
25667
|
+
MLTrainingRunPropertiesClass,
|
|
25476
25668
|
EthicalConsiderationsClass,
|
|
25477
25669
|
MLPrimaryKeyPropertiesClass,
|
|
25478
25670
|
MLModelFactorPromptsClass,
|
|
@@ -25686,6 +25878,7 @@ class AspectBag(TypedDict, total=False):
|
|
|
25686
25878
|
mlModelMetrics: MetricsClass
|
|
25687
25879
|
editableMlPrimaryKeyProperties: EditableMLPrimaryKeyPropertiesClass
|
|
25688
25880
|
sourceCode: SourceCodeClass
|
|
25881
|
+
mlTrainingRunProperties: MLTrainingRunPropertiesClass
|
|
25689
25882
|
mlModelEthicalConsiderations: EthicalConsiderationsClass
|
|
25690
25883
|
mlPrimaryKeyProperties: MLPrimaryKeyPropertiesClass
|
|
25691
25884
|
mlModelFactorPrompts: MLModelFactorPromptsClass
|
|
@@ -30,6 +30,7 @@ from ......schema_classes import MLModelFactorsClass
|
|
|
30
30
|
from ......schema_classes import MLModelGroupPropertiesClass
|
|
31
31
|
from ......schema_classes import MLModelPropertiesClass
|
|
32
32
|
from ......schema_classes import MLPrimaryKeyPropertiesClass
|
|
33
|
+
from ......schema_classes import MLTrainingRunPropertiesClass
|
|
33
34
|
from ......schema_classes import MetricsClass
|
|
34
35
|
from ......schema_classes import QuantitativeAnalysesClass
|
|
35
36
|
from ......schema_classes import SourceCodeClass
|
|
@@ -61,6 +62,7 @@ MLModelFactors = MLModelFactorsClass
|
|
|
61
62
|
MLModelGroupProperties = MLModelGroupPropertiesClass
|
|
62
63
|
MLModelProperties = MLModelPropertiesClass
|
|
63
64
|
MLPrimaryKeyProperties = MLPrimaryKeyPropertiesClass
|
|
65
|
+
MLTrainingRunProperties = MLTrainingRunPropertiesClass
|
|
64
66
|
Metrics = MetricsClass
|
|
65
67
|
QuantitativeAnalyses = QuantitativeAnalysesClass
|
|
66
68
|
SourceCode = SourceCodeClass
|