acryl-datahub 0.15.0rc24__py3-none-any.whl → 0.15.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/METADATA +2408 -2412
- {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/RECORD +116 -106
- {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/entry_points.txt +1 -1
- datahub/__init__.py +1 -1
- datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
- datahub/api/entities/structuredproperties/structuredproperties.py +20 -8
- datahub/configuration/common.py +2 -5
- datahub/configuration/source_common.py +13 -0
- datahub/emitter/mce_builder.py +20 -4
- datahub/emitter/mcp_builder.py +2 -7
- datahub/emitter/mcp_patch_builder.py +37 -13
- datahub/emitter/rest_emitter.py +25 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +98 -0
- datahub/ingestion/api/closeable.py +3 -3
- datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
- datahub/ingestion/api/report.py +4 -1
- datahub/ingestion/api/sink.py +4 -3
- datahub/ingestion/api/source.py +4 -0
- datahub/ingestion/api/source_helpers.py +2 -6
- datahub/ingestion/glossary/classifier.py +2 -3
- datahub/ingestion/graph/client.py +6 -3
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
- datahub/ingestion/source/aws/aws_common.py +231 -27
- datahub/ingestion/source/aws/glue.py +12 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
- datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
- datahub/ingestion/source/datahub/config.py +22 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +3 -17
- datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -3
- datahub/ingestion/source/gc/datahub_gc.py +21 -5
- datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
- datahub/ingestion/source/gc/execution_request_cleanup.py +61 -16
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +178 -83
- datahub/ingestion/source/iceberg/iceberg.py +27 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +4 -0
- datahub/ingestion/source/kafka_connect/__init__.py +0 -0
- datahub/ingestion/source/kafka_connect/common.py +202 -0
- datahub/ingestion/source/kafka_connect/kafka_connect.py +367 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +341 -0
- datahub/ingestion/source/kafka_connect/source_connectors.py +570 -0
- datahub/ingestion/source/looker/looker_common.py +63 -2
- datahub/ingestion/source/looker/looker_dataclasses.py +7 -9
- datahub/ingestion/source/looker/looker_lib_wrapper.py +13 -1
- datahub/ingestion/source/looker/looker_source.py +31 -4
- datahub/ingestion/source/looker/looker_usage.py +23 -17
- datahub/ingestion/source/mlflow.py +30 -5
- datahub/ingestion/source/mode.py +40 -27
- datahub/ingestion/source/powerbi/config.py +1 -14
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +1 -1
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +1 -1
- datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +16 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +16 -15
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -34
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +14 -64
- datahub/ingestion/source/snowflake/snowflake_queries.py +44 -14
- datahub/ingestion/source/snowflake/snowflake_query.py +5 -10
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +53 -7
- datahub/ingestion/source/snowflake/snowflake_shares.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +2 -5
- datahub/ingestion/source/snowflake/snowflake_utils.py +22 -18
- datahub/ingestion/source/snowflake/snowflake_v2.py +38 -34
- datahub/ingestion/source/sql/hive.py +621 -8
- datahub/ingestion/source/sql/hive_metastore.py +7 -0
- datahub/ingestion/source/sql/mssql/job_models.py +30 -1
- datahub/ingestion/source/sql/mssql/source.py +15 -1
- datahub/ingestion/source/sql/sql_common.py +41 -102
- datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
- datahub/ingestion/source/sql/sql_report.py +2 -0
- datahub/ingestion/source/state/checkpoint.py +2 -1
- datahub/ingestion/source/tableau/tableau.py +122 -45
- datahub/ingestion/source/tableau/tableau_common.py +18 -0
- datahub/ingestion/source/tableau/tableau_constant.py +3 -1
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +6 -2
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/proxy.py +8 -27
- datahub/ingestion/source/usage/usage_common.py +15 -1
- datahub/ingestion/source_report/ingestion_stage.py +3 -0
- datahub/metadata/_schema_classes.py +256 -3
- datahub/metadata/_urns/urn_defs.py +168 -168
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
- datahub/metadata/schema.avsc +252 -33
- datahub/metadata/schemas/DataJobKey.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceKey.avsc +5 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +82 -0
- datahub/metadata/schemas/MLModelProperties.avsc +62 -2
- datahub/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +94 -2
- datahub/specific/aspect_helpers/__init__.py +0 -0
- datahub/specific/aspect_helpers/custom_properties.py +79 -0
- datahub/specific/aspect_helpers/ownership.py +67 -0
- datahub/specific/aspect_helpers/structured_properties.py +72 -0
- datahub/specific/aspect_helpers/tags.py +42 -0
- datahub/specific/aspect_helpers/terms.py +43 -0
- datahub/specific/chart.py +28 -184
- datahub/specific/dashboard.py +31 -196
- datahub/specific/datajob.py +34 -189
- datahub/specific/dataproduct.py +24 -86
- datahub/specific/dataset.py +48 -133
- datahub/specific/form.py +12 -32
- datahub/specific/structured_property.py +9 -9
- datahub/sql_parsing/sql_parsing_aggregator.py +10 -9
- datahub/sql_parsing/sqlglot_lineage.py +15 -5
- datahub/sql_parsing/tool_meta_extractor.py +119 -5
- datahub/utilities/time.py +8 -3
- datahub/utilities/urns/_urn_base.py +5 -7
- datahub/ingestion/source/kafka/kafka_connect.py +0 -1468
- datahub/specific/custom_properties.py +0 -37
- datahub/specific/ownership.py +0 -48
- datahub/specific/structured_properties.py +0 -53
- {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,8 @@ from .....schema_classes import CostCostClass
|
|
|
19
19
|
from .....schema_classes import CostCostDiscriminatorClass
|
|
20
20
|
from .....schema_classes import CostTypeClass
|
|
21
21
|
from .....schema_classes import DataPlatformInstanceClass
|
|
22
|
+
from .....schema_classes import DataTransformClass
|
|
23
|
+
from .....schema_classes import DataTransformLogicClass
|
|
22
24
|
from .....schema_classes import DeprecationClass
|
|
23
25
|
from .....schema_classes import DocumentationClass
|
|
24
26
|
from .....schema_classes import DocumentationAssociationClass
|
|
@@ -79,6 +81,8 @@ CostCost = CostCostClass
|
|
|
79
81
|
CostCostDiscriminator = CostCostDiscriminatorClass
|
|
80
82
|
CostType = CostTypeClass
|
|
81
83
|
DataPlatformInstance = DataPlatformInstanceClass
|
|
84
|
+
DataTransform = DataTransformClass
|
|
85
|
+
DataTransformLogic = DataTransformLogicClass
|
|
82
86
|
Deprecation = DeprecationClass
|
|
83
87
|
Documentation = DocumentationClass
|
|
84
88
|
DocumentationAssociation = DocumentationAssociationClass
|
|
@@ -30,6 +30,7 @@ from ......schema_classes import MLModelFactorsClass
|
|
|
30
30
|
from ......schema_classes import MLModelGroupPropertiesClass
|
|
31
31
|
from ......schema_classes import MLModelPropertiesClass
|
|
32
32
|
from ......schema_classes import MLPrimaryKeyPropertiesClass
|
|
33
|
+
from ......schema_classes import MLTrainingRunPropertiesClass
|
|
33
34
|
from ......schema_classes import MetricsClass
|
|
34
35
|
from ......schema_classes import QuantitativeAnalysesClass
|
|
35
36
|
from ......schema_classes import SourceCodeClass
|
|
@@ -61,6 +62,7 @@ MLModelFactors = MLModelFactorsClass
|
|
|
61
62
|
MLModelGroupProperties = MLModelGroupPropertiesClass
|
|
62
63
|
MLModelProperties = MLModelPropertiesClass
|
|
63
64
|
MLPrimaryKeyProperties = MLPrimaryKeyPropertiesClass
|
|
65
|
+
MLTrainingRunProperties = MLTrainingRunPropertiesClass
|
|
64
66
|
Metrics = MetricsClass
|
|
65
67
|
QuantitativeAnalyses = QuantitativeAnalysesClass
|
|
66
68
|
SourceCode = SourceCodeClass
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -400,6 +400,69 @@
|
|
|
400
400
|
],
|
|
401
401
|
"doc": "Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity."
|
|
402
402
|
},
|
|
403
|
+
{
|
|
404
|
+
"type": "record",
|
|
405
|
+
"Aspect": {
|
|
406
|
+
"name": "dataTransformLogic"
|
|
407
|
+
},
|
|
408
|
+
"name": "DataTransformLogic",
|
|
409
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
410
|
+
"fields": [
|
|
411
|
+
{
|
|
412
|
+
"type": {
|
|
413
|
+
"type": "array",
|
|
414
|
+
"items": {
|
|
415
|
+
"type": "record",
|
|
416
|
+
"name": "DataTransform",
|
|
417
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
418
|
+
"fields": [
|
|
419
|
+
{
|
|
420
|
+
"type": [
|
|
421
|
+
"null",
|
|
422
|
+
{
|
|
423
|
+
"type": "record",
|
|
424
|
+
"name": "QueryStatement",
|
|
425
|
+
"namespace": "com.linkedin.pegasus2avro.query",
|
|
426
|
+
"fields": [
|
|
427
|
+
{
|
|
428
|
+
"type": "string",
|
|
429
|
+
"name": "value",
|
|
430
|
+
"doc": "The query text"
|
|
431
|
+
},
|
|
432
|
+
{
|
|
433
|
+
"type": {
|
|
434
|
+
"type": "enum",
|
|
435
|
+
"symbolDocs": {
|
|
436
|
+
"SQL": "A SQL Query"
|
|
437
|
+
},
|
|
438
|
+
"name": "QueryLanguage",
|
|
439
|
+
"namespace": "com.linkedin.pegasus2avro.query",
|
|
440
|
+
"symbols": [
|
|
441
|
+
"SQL"
|
|
442
|
+
]
|
|
443
|
+
},
|
|
444
|
+
"name": "language",
|
|
445
|
+
"default": "SQL",
|
|
446
|
+
"doc": "The language of the Query, e.g. SQL."
|
|
447
|
+
}
|
|
448
|
+
],
|
|
449
|
+
"doc": "A query statement against one or more data assets."
|
|
450
|
+
}
|
|
451
|
+
],
|
|
452
|
+
"name": "queryStatement",
|
|
453
|
+
"default": null,
|
|
454
|
+
"doc": "The data transform may be defined by a query statement"
|
|
455
|
+
}
|
|
456
|
+
],
|
|
457
|
+
"doc": "Information about a transformation. It may be a query,"
|
|
458
|
+
}
|
|
459
|
+
},
|
|
460
|
+
"name": "transforms",
|
|
461
|
+
"doc": "List of transformations applied"
|
|
462
|
+
}
|
|
463
|
+
],
|
|
464
|
+
"doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
|
|
465
|
+
},
|
|
403
466
|
{
|
|
404
467
|
"type": "record",
|
|
405
468
|
"Aspect": {
|
|
@@ -6827,6 +6890,21 @@
|
|
|
6827
6890
|
"default": null,
|
|
6828
6891
|
"doc": "URL where the reference exist"
|
|
6829
6892
|
},
|
|
6893
|
+
{
|
|
6894
|
+
"Searchable": {
|
|
6895
|
+
"boostScore": 10.0,
|
|
6896
|
+
"enableAutocomplete": true,
|
|
6897
|
+
"fieldType": "WORD_GRAM",
|
|
6898
|
+
"queryByDefault": true
|
|
6899
|
+
},
|
|
6900
|
+
"type": [
|
|
6901
|
+
"null",
|
|
6902
|
+
"string"
|
|
6903
|
+
],
|
|
6904
|
+
"name": "name",
|
|
6905
|
+
"default": null,
|
|
6906
|
+
"doc": "Display name of the MLModel"
|
|
6907
|
+
},
|
|
6830
6908
|
{
|
|
6831
6909
|
"Searchable": {
|
|
6832
6910
|
"fieldType": "TEXT",
|
|
@@ -6841,6 +6919,7 @@
|
|
|
6841
6919
|
"doc": "Documentation of the MLModel"
|
|
6842
6920
|
},
|
|
6843
6921
|
{
|
|
6922
|
+
"deprecated": true,
|
|
6844
6923
|
"type": [
|
|
6845
6924
|
"null",
|
|
6846
6925
|
"long"
|
|
@@ -6849,6 +6928,24 @@
|
|
|
6849
6928
|
"default": null,
|
|
6850
6929
|
"doc": "Date when the MLModel was developed"
|
|
6851
6930
|
},
|
|
6931
|
+
{
|
|
6932
|
+
"type": [
|
|
6933
|
+
"null",
|
|
6934
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
6935
|
+
],
|
|
6936
|
+
"name": "created",
|
|
6937
|
+
"default": null,
|
|
6938
|
+
"doc": "Audit stamp containing who created this and when"
|
|
6939
|
+
},
|
|
6940
|
+
{
|
|
6941
|
+
"type": [
|
|
6942
|
+
"null",
|
|
6943
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
6944
|
+
],
|
|
6945
|
+
"name": "lastModified",
|
|
6946
|
+
"default": null,
|
|
6947
|
+
"doc": "Date when the MLModel was last modified"
|
|
6948
|
+
},
|
|
6852
6949
|
{
|
|
6853
6950
|
"type": [
|
|
6854
6951
|
"null",
|
|
@@ -7081,7 +7178,8 @@
|
|
|
7081
7178
|
"Relationship": {
|
|
7082
7179
|
"/*": {
|
|
7083
7180
|
"entityTypes": [
|
|
7084
|
-
"dataJob"
|
|
7181
|
+
"dataJob",
|
|
7182
|
+
"dataProcessInstance"
|
|
7085
7183
|
],
|
|
7086
7184
|
"isLineage": true,
|
|
7087
7185
|
"name": "TrainedBy"
|
|
@@ -7098,7 +7196,7 @@
|
|
|
7098
7196
|
],
|
|
7099
7197
|
"name": "trainingJobs",
|
|
7100
7198
|
"default": null,
|
|
7101
|
-
"doc": "List of jobs (if any) used to train the model"
|
|
7199
|
+
"doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
|
|
7102
7200
|
},
|
|
7103
7201
|
{
|
|
7104
7202
|
"Relationship": {
|
|
@@ -8415,6 +8513,21 @@
|
|
|
8415
8513
|
"default": {},
|
|
8416
8514
|
"doc": "Custom property bag."
|
|
8417
8515
|
},
|
|
8516
|
+
{
|
|
8517
|
+
"Searchable": {
|
|
8518
|
+
"boostScore": 10.0,
|
|
8519
|
+
"enableAutocomplete": true,
|
|
8520
|
+
"fieldType": "WORD_GRAM",
|
|
8521
|
+
"queryByDefault": true
|
|
8522
|
+
},
|
|
8523
|
+
"type": [
|
|
8524
|
+
"null",
|
|
8525
|
+
"string"
|
|
8526
|
+
],
|
|
8527
|
+
"name": "name",
|
|
8528
|
+
"default": null,
|
|
8529
|
+
"doc": "Display name of the MLModelGroup"
|
|
8530
|
+
},
|
|
8418
8531
|
{
|
|
8419
8532
|
"Searchable": {
|
|
8420
8533
|
"fieldType": "TEXT",
|
|
@@ -8429,6 +8542,7 @@
|
|
|
8429
8542
|
"doc": "Documentation of the MLModelGroup"
|
|
8430
8543
|
},
|
|
8431
8544
|
{
|
|
8545
|
+
"deprecated": true,
|
|
8432
8546
|
"type": [
|
|
8433
8547
|
"null",
|
|
8434
8548
|
"long"
|
|
@@ -8437,6 +8551,47 @@
|
|
|
8437
8551
|
"default": null,
|
|
8438
8552
|
"doc": "Date when the MLModelGroup was developed"
|
|
8439
8553
|
},
|
|
8554
|
+
{
|
|
8555
|
+
"type": [
|
|
8556
|
+
"null",
|
|
8557
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
8558
|
+
],
|
|
8559
|
+
"name": "created",
|
|
8560
|
+
"default": null,
|
|
8561
|
+
"doc": "Time and Actor who created the MLModelGroup"
|
|
8562
|
+
},
|
|
8563
|
+
{
|
|
8564
|
+
"type": [
|
|
8565
|
+
"null",
|
|
8566
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
8567
|
+
],
|
|
8568
|
+
"name": "lastModified",
|
|
8569
|
+
"default": null,
|
|
8570
|
+
"doc": "Date when the MLModelGroup was last modified"
|
|
8571
|
+
},
|
|
8572
|
+
{
|
|
8573
|
+
"Relationship": {
|
|
8574
|
+
"/*": {
|
|
8575
|
+
"entityTypes": [
|
|
8576
|
+
"dataJob"
|
|
8577
|
+
],
|
|
8578
|
+
"isLineage": true,
|
|
8579
|
+
"name": "TrainedBy"
|
|
8580
|
+
}
|
|
8581
|
+
},
|
|
8582
|
+
"Urn": "Urn",
|
|
8583
|
+
"urn_is_array": true,
|
|
8584
|
+
"type": [
|
|
8585
|
+
"null",
|
|
8586
|
+
{
|
|
8587
|
+
"type": "array",
|
|
8588
|
+
"items": "string"
|
|
8589
|
+
}
|
|
8590
|
+
],
|
|
8591
|
+
"name": "trainingJobs",
|
|
8592
|
+
"default": null,
|
|
8593
|
+
"doc": "List of jobs (if any) used to train the model group. Visible in Lineage."
|
|
8594
|
+
},
|
|
8440
8595
|
{
|
|
8441
8596
|
"type": [
|
|
8442
8597
|
"null",
|
|
@@ -11855,35 +12010,7 @@
|
|
|
11855
12010
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
11856
12011
|
"fields": [
|
|
11857
12012
|
{
|
|
11858
|
-
"type":
|
|
11859
|
-
"type": "record",
|
|
11860
|
-
"name": "QueryStatement",
|
|
11861
|
-
"namespace": "com.linkedin.pegasus2avro.query",
|
|
11862
|
-
"fields": [
|
|
11863
|
-
{
|
|
11864
|
-
"type": "string",
|
|
11865
|
-
"name": "value",
|
|
11866
|
-
"doc": "The query text"
|
|
11867
|
-
},
|
|
11868
|
-
{
|
|
11869
|
-
"type": {
|
|
11870
|
-
"type": "enum",
|
|
11871
|
-
"symbolDocs": {
|
|
11872
|
-
"SQL": "A SQL Query"
|
|
11873
|
-
},
|
|
11874
|
-
"name": "QueryLanguage",
|
|
11875
|
-
"namespace": "com.linkedin.pegasus2avro.query",
|
|
11876
|
-
"symbols": [
|
|
11877
|
-
"SQL"
|
|
11878
|
-
]
|
|
11879
|
-
},
|
|
11880
|
-
"name": "language",
|
|
11881
|
-
"default": "SQL",
|
|
11882
|
-
"doc": "The language of the Query, e.g. SQL."
|
|
11883
|
-
}
|
|
11884
|
-
],
|
|
11885
|
-
"doc": "A query statement against one or more data assets."
|
|
11886
|
-
},
|
|
12013
|
+
"type": "com.linkedin.pegasus2avro.query.QueryStatement",
|
|
11887
12014
|
"name": "statement",
|
|
11888
12015
|
"doc": "The Query Statement."
|
|
11889
12016
|
},
|
|
@@ -12619,7 +12746,8 @@
|
|
|
12619
12746
|
"Relationship": {
|
|
12620
12747
|
"/*": {
|
|
12621
12748
|
"entityTypes": [
|
|
12622
|
-
"dataset"
|
|
12749
|
+
"dataset",
|
|
12750
|
+
"mlModel"
|
|
12623
12751
|
],
|
|
12624
12752
|
"name": "Produces"
|
|
12625
12753
|
}
|
|
@@ -12944,6 +13072,93 @@
|
|
|
12944
13072
|
"doc": "Properties associated with a MLPrimaryKey editable from the UI"
|
|
12945
13073
|
},
|
|
12946
13074
|
"com.linkedin.pegasus2avro.ml.metadata.SourceCode",
|
|
13075
|
+
{
|
|
13076
|
+
"type": "record",
|
|
13077
|
+
"Aspect": {
|
|
13078
|
+
"name": "mlTrainingRunProperties"
|
|
13079
|
+
},
|
|
13080
|
+
"name": "MLTrainingRunProperties",
|
|
13081
|
+
"namespace": "com.linkedin.pegasus2avro.ml.metadata",
|
|
13082
|
+
"fields": [
|
|
13083
|
+
{
|
|
13084
|
+
"Searchable": {
|
|
13085
|
+
"/*": {
|
|
13086
|
+
"fieldType": "TEXT",
|
|
13087
|
+
"queryByDefault": true
|
|
13088
|
+
}
|
|
13089
|
+
},
|
|
13090
|
+
"type": {
|
|
13091
|
+
"type": "map",
|
|
13092
|
+
"values": "string"
|
|
13093
|
+
},
|
|
13094
|
+
"name": "customProperties",
|
|
13095
|
+
"default": {},
|
|
13096
|
+
"doc": "Custom property bag."
|
|
13097
|
+
},
|
|
13098
|
+
{
|
|
13099
|
+
"Searchable": {
|
|
13100
|
+
"fieldType": "KEYWORD"
|
|
13101
|
+
},
|
|
13102
|
+
"java": {
|
|
13103
|
+
"class": "com.linkedin.pegasus2avro.common.url.Url",
|
|
13104
|
+
"coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer"
|
|
13105
|
+
},
|
|
13106
|
+
"type": [
|
|
13107
|
+
"null",
|
|
13108
|
+
"string"
|
|
13109
|
+
],
|
|
13110
|
+
"name": "externalUrl",
|
|
13111
|
+
"default": null,
|
|
13112
|
+
"doc": "URL where the reference exist"
|
|
13113
|
+
},
|
|
13114
|
+
{
|
|
13115
|
+
"type": [
|
|
13116
|
+
"null",
|
|
13117
|
+
"string"
|
|
13118
|
+
],
|
|
13119
|
+
"name": "id",
|
|
13120
|
+
"default": null,
|
|
13121
|
+
"doc": "Run Id of the ML Training Run"
|
|
13122
|
+
},
|
|
13123
|
+
{
|
|
13124
|
+
"type": [
|
|
13125
|
+
"null",
|
|
13126
|
+
{
|
|
13127
|
+
"type": "array",
|
|
13128
|
+
"items": "string"
|
|
13129
|
+
}
|
|
13130
|
+
],
|
|
13131
|
+
"name": "outputUrls",
|
|
13132
|
+
"default": null,
|
|
13133
|
+
"doc": "List of URLs for the Outputs of the ML Training Run"
|
|
13134
|
+
},
|
|
13135
|
+
{
|
|
13136
|
+
"type": [
|
|
13137
|
+
"null",
|
|
13138
|
+
{
|
|
13139
|
+
"type": "array",
|
|
13140
|
+
"items": "com.linkedin.pegasus2avro.ml.metadata.MLHyperParam"
|
|
13141
|
+
}
|
|
13142
|
+
],
|
|
13143
|
+
"name": "hyperParams",
|
|
13144
|
+
"default": null,
|
|
13145
|
+
"doc": "Hyperparameters of the ML Training Run"
|
|
13146
|
+
},
|
|
13147
|
+
{
|
|
13148
|
+
"type": [
|
|
13149
|
+
"null",
|
|
13150
|
+
{
|
|
13151
|
+
"type": "array",
|
|
13152
|
+
"items": "com.linkedin.pegasus2avro.ml.metadata.MLMetric"
|
|
13153
|
+
}
|
|
13154
|
+
],
|
|
13155
|
+
"name": "trainingMetrics",
|
|
13156
|
+
"default": null,
|
|
13157
|
+
"doc": "Metrics of the ML Training Run"
|
|
13158
|
+
}
|
|
13159
|
+
],
|
|
13160
|
+
"doc": "The inputs and outputs of this training run"
|
|
13161
|
+
},
|
|
12947
13162
|
"com.linkedin.pegasus2avro.ml.metadata.EthicalConsiderations",
|
|
12948
13163
|
"com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties",
|
|
12949
13164
|
"com.linkedin.pegasus2avro.ml.metadata.MLModelFactorPrompts",
|
|
@@ -15561,7 +15776,11 @@
|
|
|
15561
15776
|
"dataProcessInstanceRelationships",
|
|
15562
15777
|
"dataProcessInstanceRunEvent",
|
|
15563
15778
|
"status",
|
|
15564
|
-
"testResults"
|
|
15779
|
+
"testResults",
|
|
15780
|
+
"dataPlatformInstance",
|
|
15781
|
+
"subTypes",
|
|
15782
|
+
"container",
|
|
15783
|
+
"mlTrainingRunProperties"
|
|
15565
15784
|
],
|
|
15566
15785
|
"entityDoc": "DataProcessInstance represents an instance of a datajob/jobflow run"
|
|
15567
15786
|
},
|
|
@@ -11,7 +11,11 @@
|
|
|
11
11
|
"dataProcessInstanceRelationships",
|
|
12
12
|
"dataProcessInstanceRunEvent",
|
|
13
13
|
"status",
|
|
14
|
-
"testResults"
|
|
14
|
+
"testResults",
|
|
15
|
+
"dataPlatformInstance",
|
|
16
|
+
"subTypes",
|
|
17
|
+
"container",
|
|
18
|
+
"mlTrainingRunProperties"
|
|
15
19
|
],
|
|
16
20
|
"entityDoc": "DataProcessInstance represents an instance of a datajob/jobflow run"
|
|
17
21
|
},
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "dataTransformLogic"
|
|
5
|
+
},
|
|
6
|
+
"name": "DataTransformLogic",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"type": {
|
|
11
|
+
"type": "array",
|
|
12
|
+
"items": {
|
|
13
|
+
"type": "record",
|
|
14
|
+
"name": "DataTransform",
|
|
15
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
16
|
+
"fields": [
|
|
17
|
+
{
|
|
18
|
+
"type": [
|
|
19
|
+
"null",
|
|
20
|
+
{
|
|
21
|
+
"type": "record",
|
|
22
|
+
"name": "QueryStatement",
|
|
23
|
+
"namespace": "com.linkedin.pegasus2avro.query",
|
|
24
|
+
"fields": [
|
|
25
|
+
{
|
|
26
|
+
"type": "string",
|
|
27
|
+
"name": "value",
|
|
28
|
+
"doc": "The query text"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"type": {
|
|
32
|
+
"type": "enum",
|
|
33
|
+
"symbolDocs": {
|
|
34
|
+
"SQL": "A SQL Query"
|
|
35
|
+
},
|
|
36
|
+
"name": "QueryLanguage",
|
|
37
|
+
"namespace": "com.linkedin.pegasus2avro.query",
|
|
38
|
+
"symbols": [
|
|
39
|
+
"SQL"
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
"name": "language",
|
|
43
|
+
"default": "SQL",
|
|
44
|
+
"doc": "The language of the Query, e.g. SQL."
|
|
45
|
+
}
|
|
46
|
+
],
|
|
47
|
+
"doc": "A query statement against one or more data assets."
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
"name": "queryStatement",
|
|
51
|
+
"default": null,
|
|
52
|
+
"doc": "The data transform may be defined by a query statement"
|
|
53
|
+
}
|
|
54
|
+
],
|
|
55
|
+
"doc": "Information about a transformation. It may be a query,"
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"name": "transforms",
|
|
59
|
+
"doc": "List of transformations applied"
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
"doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
|
|
63
|
+
}
|
|
@@ -21,6 +21,21 @@
|
|
|
21
21
|
"default": {},
|
|
22
22
|
"doc": "Custom property bag."
|
|
23
23
|
},
|
|
24
|
+
{
|
|
25
|
+
"Searchable": {
|
|
26
|
+
"boostScore": 10.0,
|
|
27
|
+
"enableAutocomplete": true,
|
|
28
|
+
"fieldType": "WORD_GRAM",
|
|
29
|
+
"queryByDefault": true
|
|
30
|
+
},
|
|
31
|
+
"type": [
|
|
32
|
+
"null",
|
|
33
|
+
"string"
|
|
34
|
+
],
|
|
35
|
+
"name": "name",
|
|
36
|
+
"default": null,
|
|
37
|
+
"doc": "Display name of the MLModelGroup"
|
|
38
|
+
},
|
|
24
39
|
{
|
|
25
40
|
"Searchable": {
|
|
26
41
|
"fieldType": "TEXT",
|
|
@@ -35,6 +50,7 @@
|
|
|
35
50
|
"doc": "Documentation of the MLModelGroup"
|
|
36
51
|
},
|
|
37
52
|
{
|
|
53
|
+
"deprecated": true,
|
|
38
54
|
"type": [
|
|
39
55
|
"null",
|
|
40
56
|
"long"
|
|
@@ -43,6 +59,72 @@
|
|
|
43
59
|
"default": null,
|
|
44
60
|
"doc": "Date when the MLModelGroup was developed"
|
|
45
61
|
},
|
|
62
|
+
{
|
|
63
|
+
"type": [
|
|
64
|
+
"null",
|
|
65
|
+
{
|
|
66
|
+
"type": "record",
|
|
67
|
+
"name": "TimeStamp",
|
|
68
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
69
|
+
"fields": [
|
|
70
|
+
{
|
|
71
|
+
"type": "long",
|
|
72
|
+
"name": "time",
|
|
73
|
+
"doc": "When did the event occur"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"java": {
|
|
77
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
78
|
+
},
|
|
79
|
+
"type": [
|
|
80
|
+
"null",
|
|
81
|
+
"string"
|
|
82
|
+
],
|
|
83
|
+
"name": "actor",
|
|
84
|
+
"default": null,
|
|
85
|
+
"doc": "Optional: The actor urn involved in the event.",
|
|
86
|
+
"Urn": "Urn"
|
|
87
|
+
}
|
|
88
|
+
],
|
|
89
|
+
"doc": "A standard event timestamp"
|
|
90
|
+
}
|
|
91
|
+
],
|
|
92
|
+
"name": "created",
|
|
93
|
+
"default": null,
|
|
94
|
+
"doc": "Time and Actor who created the MLModelGroup"
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"type": [
|
|
98
|
+
"null",
|
|
99
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
100
|
+
],
|
|
101
|
+
"name": "lastModified",
|
|
102
|
+
"default": null,
|
|
103
|
+
"doc": "Date when the MLModelGroup was last modified"
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"Relationship": {
|
|
107
|
+
"/*": {
|
|
108
|
+
"entityTypes": [
|
|
109
|
+
"dataJob"
|
|
110
|
+
],
|
|
111
|
+
"isLineage": true,
|
|
112
|
+
"name": "TrainedBy"
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
"type": [
|
|
116
|
+
"null",
|
|
117
|
+
{
|
|
118
|
+
"type": "array",
|
|
119
|
+
"items": "string"
|
|
120
|
+
}
|
|
121
|
+
],
|
|
122
|
+
"name": "trainingJobs",
|
|
123
|
+
"default": null,
|
|
124
|
+
"doc": "List of jobs (if any) used to train the model group. Visible in Lineage.",
|
|
125
|
+
"Urn": "Urn",
|
|
126
|
+
"urn_is_array": true
|
|
127
|
+
},
|
|
46
128
|
{
|
|
47
129
|
"type": [
|
|
48
130
|
"null",
|