acryl-datahub 0.15.0rc25__py3-none-any.whl → 0.15.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (120) hide show
  1. {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/METADATA +2236 -2240
  2. {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/RECORD +116 -106
  3. {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/entry_points.txt +1 -1
  5. datahub/__init__.py +1 -1
  6. datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
  7. datahub/api/entities/structuredproperties/structuredproperties.py +20 -8
  8. datahub/configuration/common.py +2 -5
  9. datahub/configuration/source_common.py +13 -0
  10. datahub/emitter/mce_builder.py +20 -4
  11. datahub/emitter/mcp_builder.py +2 -7
  12. datahub/emitter/mcp_patch_builder.py +37 -13
  13. datahub/emitter/rest_emitter.py +25 -3
  14. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +98 -0
  15. datahub/ingestion/api/closeable.py +3 -3
  16. datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
  17. datahub/ingestion/api/report.py +4 -1
  18. datahub/ingestion/api/sink.py +4 -3
  19. datahub/ingestion/api/source.py +4 -0
  20. datahub/ingestion/api/source_helpers.py +2 -6
  21. datahub/ingestion/glossary/classifier.py +2 -3
  22. datahub/ingestion/graph/client.py +6 -3
  23. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
  24. datahub/ingestion/source/aws/aws_common.py +231 -27
  25. datahub/ingestion/source/aws/glue.py +12 -2
  26. datahub/ingestion/source/bigquery_v2/bigquery.py +10 -18
  27. datahub/ingestion/source/bigquery_v2/bigquery_config.py +3 -9
  28. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
  29. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -17
  30. datahub/ingestion/source/bigquery_v2/lineage.py +9 -22
  31. datahub/ingestion/source/datahub/config.py +22 -1
  32. datahub/ingestion/source/datahub/datahub_database_reader.py +3 -17
  33. datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
  34. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  35. datahub/ingestion/source/dbt/dbt_cloud.py +10 -3
  36. datahub/ingestion/source/gc/datahub_gc.py +21 -5
  37. datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
  38. datahub/ingestion/source/gc/execution_request_cleanup.py +61 -16
  39. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +178 -83
  40. datahub/ingestion/source/iceberg/iceberg.py +27 -1
  41. datahub/ingestion/source/iceberg/iceberg_common.py +4 -0
  42. datahub/ingestion/source/kafka_connect/__init__.py +0 -0
  43. datahub/ingestion/source/kafka_connect/common.py +202 -0
  44. datahub/ingestion/source/kafka_connect/kafka_connect.py +367 -0
  45. datahub/ingestion/source/kafka_connect/sink_connectors.py +341 -0
  46. datahub/ingestion/source/kafka_connect/source_connectors.py +570 -0
  47. datahub/ingestion/source/looker/looker_common.py +63 -2
  48. datahub/ingestion/source/looker/looker_dataclasses.py +7 -9
  49. datahub/ingestion/source/looker/looker_lib_wrapper.py +13 -1
  50. datahub/ingestion/source/looker/looker_source.py +31 -4
  51. datahub/ingestion/source/looker/looker_usage.py +23 -17
  52. datahub/ingestion/source/mlflow.py +30 -5
  53. datahub/ingestion/source/mode.py +40 -27
  54. datahub/ingestion/source/powerbi/config.py +1 -14
  55. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +1 -1
  56. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +1 -1
  57. datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +16 -2
  58. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +16 -15
  59. datahub/ingestion/source/s3/source.py +1 -1
  60. datahub/ingestion/source/snowflake/snowflake_config.py +13 -34
  61. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +14 -64
  62. datahub/ingestion/source/snowflake/snowflake_queries.py +44 -14
  63. datahub/ingestion/source/snowflake/snowflake_query.py +5 -10
  64. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +53 -7
  65. datahub/ingestion/source/snowflake/snowflake_shares.py +1 -1
  66. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +2 -5
  67. datahub/ingestion/source/snowflake/snowflake_utils.py +22 -18
  68. datahub/ingestion/source/snowflake/snowflake_v2.py +38 -34
  69. datahub/ingestion/source/sql/hive.py +621 -8
  70. datahub/ingestion/source/sql/hive_metastore.py +7 -0
  71. datahub/ingestion/source/sql/mssql/job_models.py +30 -1
  72. datahub/ingestion/source/sql/mssql/source.py +15 -1
  73. datahub/ingestion/source/sql/sql_common.py +41 -102
  74. datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
  75. datahub/ingestion/source/sql/sql_report.py +2 -0
  76. datahub/ingestion/source/state/checkpoint.py +2 -1
  77. datahub/ingestion/source/tableau/tableau.py +122 -45
  78. datahub/ingestion/source/tableau/tableau_common.py +18 -0
  79. datahub/ingestion/source/tableau/tableau_constant.py +3 -1
  80. datahub/ingestion/source/tableau/tableau_server_wrapper.py +6 -2
  81. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  82. datahub/ingestion/source/unity/proxy.py +8 -27
  83. datahub/ingestion/source/usage/usage_common.py +15 -1
  84. datahub/ingestion/source_report/ingestion_stage.py +3 -0
  85. datahub/metadata/_schema_classes.py +256 -3
  86. datahub/metadata/_urns/urn_defs.py +168 -168
  87. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
  88. datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
  89. datahub/metadata/schema.avsc +252 -33
  90. datahub/metadata/schemas/DataJobKey.avsc +2 -1
  91. datahub/metadata/schemas/DataProcessInstanceKey.avsc +5 -1
  92. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  93. datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
  94. datahub/metadata/schemas/MLModelGroupProperties.avsc +82 -0
  95. datahub/metadata/schemas/MLModelProperties.avsc +62 -2
  96. datahub/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
  97. datahub/metadata/schemas/MetadataChangeEvent.avsc +94 -2
  98. datahub/specific/aspect_helpers/__init__.py +0 -0
  99. datahub/specific/aspect_helpers/custom_properties.py +79 -0
  100. datahub/specific/aspect_helpers/ownership.py +67 -0
  101. datahub/specific/aspect_helpers/structured_properties.py +72 -0
  102. datahub/specific/aspect_helpers/tags.py +42 -0
  103. datahub/specific/aspect_helpers/terms.py +43 -0
  104. datahub/specific/chart.py +28 -184
  105. datahub/specific/dashboard.py +31 -196
  106. datahub/specific/datajob.py +34 -189
  107. datahub/specific/dataproduct.py +24 -86
  108. datahub/specific/dataset.py +48 -133
  109. datahub/specific/form.py +12 -32
  110. datahub/specific/structured_property.py +9 -9
  111. datahub/sql_parsing/sql_parsing_aggregator.py +10 -9
  112. datahub/sql_parsing/sqlglot_lineage.py +15 -5
  113. datahub/sql_parsing/tool_meta_extractor.py +119 -5
  114. datahub/utilities/time.py +8 -3
  115. datahub/utilities/urns/_urn_base.py +5 -7
  116. datahub/ingestion/source/kafka/kafka_connect.py +0 -1468
  117. datahub/specific/custom_properties.py +0 -37
  118. datahub/specific/ownership.py +0 -48
  119. datahub/specific/structured_properties.py +0 -53
  120. {acryl_datahub-0.15.0rc25.dist-info → acryl_datahub-0.15.0.1.dist-info}/top_level.txt +0 -0
@@ -19,6 +19,8 @@ from .....schema_classes import CostCostClass
19
19
  from .....schema_classes import CostCostDiscriminatorClass
20
20
  from .....schema_classes import CostTypeClass
21
21
  from .....schema_classes import DataPlatformInstanceClass
22
+ from .....schema_classes import DataTransformClass
23
+ from .....schema_classes import DataTransformLogicClass
22
24
  from .....schema_classes import DeprecationClass
23
25
  from .....schema_classes import DocumentationClass
24
26
  from .....schema_classes import DocumentationAssociationClass
@@ -79,6 +81,8 @@ CostCost = CostCostClass
79
81
  CostCostDiscriminator = CostCostDiscriminatorClass
80
82
  CostType = CostTypeClass
81
83
  DataPlatformInstance = DataPlatformInstanceClass
84
+ DataTransform = DataTransformClass
85
+ DataTransformLogic = DataTransformLogicClass
82
86
  Deprecation = DeprecationClass
83
87
  Documentation = DocumentationClass
84
88
  DocumentationAssociation = DocumentationAssociationClass
@@ -30,6 +30,7 @@ from ......schema_classes import MLModelFactorsClass
30
30
  from ......schema_classes import MLModelGroupPropertiesClass
31
31
  from ......schema_classes import MLModelPropertiesClass
32
32
  from ......schema_classes import MLPrimaryKeyPropertiesClass
33
+ from ......schema_classes import MLTrainingRunPropertiesClass
33
34
  from ......schema_classes import MetricsClass
34
35
  from ......schema_classes import QuantitativeAnalysesClass
35
36
  from ......schema_classes import SourceCodeClass
@@ -61,6 +62,7 @@ MLModelFactors = MLModelFactorsClass
61
62
  MLModelGroupProperties = MLModelGroupPropertiesClass
62
63
  MLModelProperties = MLModelPropertiesClass
63
64
  MLPrimaryKeyProperties = MLPrimaryKeyPropertiesClass
65
+ MLTrainingRunProperties = MLTrainingRunPropertiesClass
64
66
  Metrics = MetricsClass
65
67
  QuantitativeAnalyses = QuantitativeAnalysesClass
66
68
  SourceCode = SourceCodeClass
@@ -400,6 +400,69 @@
400
400
  ],
401
401
  "doc": "Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity."
402
402
  },
403
+ {
404
+ "type": "record",
405
+ "Aspect": {
406
+ "name": "dataTransformLogic"
407
+ },
408
+ "name": "DataTransformLogic",
409
+ "namespace": "com.linkedin.pegasus2avro.common",
410
+ "fields": [
411
+ {
412
+ "type": {
413
+ "type": "array",
414
+ "items": {
415
+ "type": "record",
416
+ "name": "DataTransform",
417
+ "namespace": "com.linkedin.pegasus2avro.common",
418
+ "fields": [
419
+ {
420
+ "type": [
421
+ "null",
422
+ {
423
+ "type": "record",
424
+ "name": "QueryStatement",
425
+ "namespace": "com.linkedin.pegasus2avro.query",
426
+ "fields": [
427
+ {
428
+ "type": "string",
429
+ "name": "value",
430
+ "doc": "The query text"
431
+ },
432
+ {
433
+ "type": {
434
+ "type": "enum",
435
+ "symbolDocs": {
436
+ "SQL": "A SQL Query"
437
+ },
438
+ "name": "QueryLanguage",
439
+ "namespace": "com.linkedin.pegasus2avro.query",
440
+ "symbols": [
441
+ "SQL"
442
+ ]
443
+ },
444
+ "name": "language",
445
+ "default": "SQL",
446
+ "doc": "The language of the Query, e.g. SQL."
447
+ }
448
+ ],
449
+ "doc": "A query statement against one or more data assets."
450
+ }
451
+ ],
452
+ "name": "queryStatement",
453
+ "default": null,
454
+ "doc": "The data transform may be defined by a query statement"
455
+ }
456
+ ],
457
+ "doc": "Information about a transformation. It may be a query,"
458
+ }
459
+ },
460
+ "name": "transforms",
461
+ "doc": "List of transformations applied"
462
+ }
463
+ ],
464
+ "doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
465
+ },
403
466
  {
404
467
  "type": "record",
405
468
  "Aspect": {
@@ -6827,6 +6890,21 @@
6827
6890
  "default": null,
6828
6891
  "doc": "URL where the reference exist"
6829
6892
  },
6893
+ {
6894
+ "Searchable": {
6895
+ "boostScore": 10.0,
6896
+ "enableAutocomplete": true,
6897
+ "fieldType": "WORD_GRAM",
6898
+ "queryByDefault": true
6899
+ },
6900
+ "type": [
6901
+ "null",
6902
+ "string"
6903
+ ],
6904
+ "name": "name",
6905
+ "default": null,
6906
+ "doc": "Display name of the MLModel"
6907
+ },
6830
6908
  {
6831
6909
  "Searchable": {
6832
6910
  "fieldType": "TEXT",
@@ -6841,6 +6919,7 @@
6841
6919
  "doc": "Documentation of the MLModel"
6842
6920
  },
6843
6921
  {
6922
+ "deprecated": true,
6844
6923
  "type": [
6845
6924
  "null",
6846
6925
  "long"
@@ -6849,6 +6928,24 @@
6849
6928
  "default": null,
6850
6929
  "doc": "Date when the MLModel was developed"
6851
6930
  },
6931
+ {
6932
+ "type": [
6933
+ "null",
6934
+ "com.linkedin.pegasus2avro.common.TimeStamp"
6935
+ ],
6936
+ "name": "created",
6937
+ "default": null,
6938
+ "doc": "Audit stamp containing who created this and when"
6939
+ },
6940
+ {
6941
+ "type": [
6942
+ "null",
6943
+ "com.linkedin.pegasus2avro.common.TimeStamp"
6944
+ ],
6945
+ "name": "lastModified",
6946
+ "default": null,
6947
+ "doc": "Date when the MLModel was last modified"
6948
+ },
6852
6949
  {
6853
6950
  "type": [
6854
6951
  "null",
@@ -7081,7 +7178,8 @@
7081
7178
  "Relationship": {
7082
7179
  "/*": {
7083
7180
  "entityTypes": [
7084
- "dataJob"
7181
+ "dataJob",
7182
+ "dataProcessInstance"
7085
7183
  ],
7086
7184
  "isLineage": true,
7087
7185
  "name": "TrainedBy"
@@ -7098,7 +7196,7 @@
7098
7196
  ],
7099
7197
  "name": "trainingJobs",
7100
7198
  "default": null,
7101
- "doc": "List of jobs (if any) used to train the model"
7199
+ "doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
7102
7200
  },
7103
7201
  {
7104
7202
  "Relationship": {
@@ -8415,6 +8513,21 @@
8415
8513
  "default": {},
8416
8514
  "doc": "Custom property bag."
8417
8515
  },
8516
+ {
8517
+ "Searchable": {
8518
+ "boostScore": 10.0,
8519
+ "enableAutocomplete": true,
8520
+ "fieldType": "WORD_GRAM",
8521
+ "queryByDefault": true
8522
+ },
8523
+ "type": [
8524
+ "null",
8525
+ "string"
8526
+ ],
8527
+ "name": "name",
8528
+ "default": null,
8529
+ "doc": "Display name of the MLModelGroup"
8530
+ },
8418
8531
  {
8419
8532
  "Searchable": {
8420
8533
  "fieldType": "TEXT",
@@ -8429,6 +8542,7 @@
8429
8542
  "doc": "Documentation of the MLModelGroup"
8430
8543
  },
8431
8544
  {
8545
+ "deprecated": true,
8432
8546
  "type": [
8433
8547
  "null",
8434
8548
  "long"
@@ -8437,6 +8551,47 @@
8437
8551
  "default": null,
8438
8552
  "doc": "Date when the MLModelGroup was developed"
8439
8553
  },
8554
+ {
8555
+ "type": [
8556
+ "null",
8557
+ "com.linkedin.pegasus2avro.common.TimeStamp"
8558
+ ],
8559
+ "name": "created",
8560
+ "default": null,
8561
+ "doc": "Time and Actor who created the MLModelGroup"
8562
+ },
8563
+ {
8564
+ "type": [
8565
+ "null",
8566
+ "com.linkedin.pegasus2avro.common.TimeStamp"
8567
+ ],
8568
+ "name": "lastModified",
8569
+ "default": null,
8570
+ "doc": "Date when the MLModelGroup was last modified"
8571
+ },
8572
+ {
8573
+ "Relationship": {
8574
+ "/*": {
8575
+ "entityTypes": [
8576
+ "dataJob"
8577
+ ],
8578
+ "isLineage": true,
8579
+ "name": "TrainedBy"
8580
+ }
8581
+ },
8582
+ "Urn": "Urn",
8583
+ "urn_is_array": true,
8584
+ "type": [
8585
+ "null",
8586
+ {
8587
+ "type": "array",
8588
+ "items": "string"
8589
+ }
8590
+ ],
8591
+ "name": "trainingJobs",
8592
+ "default": null,
8593
+ "doc": "List of jobs (if any) used to train the model group. Visible in Lineage."
8594
+ },
8440
8595
  {
8441
8596
  "type": [
8442
8597
  "null",
@@ -11855,35 +12010,7 @@
11855
12010
  "namespace": "com.linkedin.pegasus2avro.query",
11856
12011
  "fields": [
11857
12012
  {
11858
- "type": {
11859
- "type": "record",
11860
- "name": "QueryStatement",
11861
- "namespace": "com.linkedin.pegasus2avro.query",
11862
- "fields": [
11863
- {
11864
- "type": "string",
11865
- "name": "value",
11866
- "doc": "The query text"
11867
- },
11868
- {
11869
- "type": {
11870
- "type": "enum",
11871
- "symbolDocs": {
11872
- "SQL": "A SQL Query"
11873
- },
11874
- "name": "QueryLanguage",
11875
- "namespace": "com.linkedin.pegasus2avro.query",
11876
- "symbols": [
11877
- "SQL"
11878
- ]
11879
- },
11880
- "name": "language",
11881
- "default": "SQL",
11882
- "doc": "The language of the Query, e.g. SQL."
11883
- }
11884
- ],
11885
- "doc": "A query statement against one or more data assets."
11886
- },
12013
+ "type": "com.linkedin.pegasus2avro.query.QueryStatement",
11887
12014
  "name": "statement",
11888
12015
  "doc": "The Query Statement."
11889
12016
  },
@@ -12619,7 +12746,8 @@
12619
12746
  "Relationship": {
12620
12747
  "/*": {
12621
12748
  "entityTypes": [
12622
- "dataset"
12749
+ "dataset",
12750
+ "mlModel"
12623
12751
  ],
12624
12752
  "name": "Produces"
12625
12753
  }
@@ -12944,6 +13072,93 @@
12944
13072
  "doc": "Properties associated with a MLPrimaryKey editable from the UI"
12945
13073
  },
12946
13074
  "com.linkedin.pegasus2avro.ml.metadata.SourceCode",
13075
+ {
13076
+ "type": "record",
13077
+ "Aspect": {
13078
+ "name": "mlTrainingRunProperties"
13079
+ },
13080
+ "name": "MLTrainingRunProperties",
13081
+ "namespace": "com.linkedin.pegasus2avro.ml.metadata",
13082
+ "fields": [
13083
+ {
13084
+ "Searchable": {
13085
+ "/*": {
13086
+ "fieldType": "TEXT",
13087
+ "queryByDefault": true
13088
+ }
13089
+ },
13090
+ "type": {
13091
+ "type": "map",
13092
+ "values": "string"
13093
+ },
13094
+ "name": "customProperties",
13095
+ "default": {},
13096
+ "doc": "Custom property bag."
13097
+ },
13098
+ {
13099
+ "Searchable": {
13100
+ "fieldType": "KEYWORD"
13101
+ },
13102
+ "java": {
13103
+ "class": "com.linkedin.pegasus2avro.common.url.Url",
13104
+ "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer"
13105
+ },
13106
+ "type": [
13107
+ "null",
13108
+ "string"
13109
+ ],
13110
+ "name": "externalUrl",
13111
+ "default": null,
13112
+ "doc": "URL where the reference exist"
13113
+ },
13114
+ {
13115
+ "type": [
13116
+ "null",
13117
+ "string"
13118
+ ],
13119
+ "name": "id",
13120
+ "default": null,
13121
+ "doc": "Run Id of the ML Training Run"
13122
+ },
13123
+ {
13124
+ "type": [
13125
+ "null",
13126
+ {
13127
+ "type": "array",
13128
+ "items": "string"
13129
+ }
13130
+ ],
13131
+ "name": "outputUrls",
13132
+ "default": null,
13133
+ "doc": "List of URLs for the Outputs of the ML Training Run"
13134
+ },
13135
+ {
13136
+ "type": [
13137
+ "null",
13138
+ {
13139
+ "type": "array",
13140
+ "items": "com.linkedin.pegasus2avro.ml.metadata.MLHyperParam"
13141
+ }
13142
+ ],
13143
+ "name": "hyperParams",
13144
+ "default": null,
13145
+ "doc": "Hyperparameters of the ML Training Run"
13146
+ },
13147
+ {
13148
+ "type": [
13149
+ "null",
13150
+ {
13151
+ "type": "array",
13152
+ "items": "com.linkedin.pegasus2avro.ml.metadata.MLMetric"
13153
+ }
13154
+ ],
13155
+ "name": "trainingMetrics",
13156
+ "default": null,
13157
+ "doc": "Metrics of the ML Training Run"
13158
+ }
13159
+ ],
13160
+ "doc": "The inputs and outputs of this training run"
13161
+ },
12947
13162
  "com.linkedin.pegasus2avro.ml.metadata.EthicalConsiderations",
12948
13163
  "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties",
12949
13164
  "com.linkedin.pegasus2avro.ml.metadata.MLModelFactorPrompts",
@@ -15561,7 +15776,11 @@
15561
15776
  "dataProcessInstanceRelationships",
15562
15777
  "dataProcessInstanceRunEvent",
15563
15778
  "status",
15564
- "testResults"
15779
+ "testResults",
15780
+ "dataPlatformInstance",
15781
+ "subTypes",
15782
+ "container",
15783
+ "mlTrainingRunProperties"
15565
15784
  ],
15566
15785
  "entityDoc": "DataProcessInstance represents an instance of a datajob/jobflow run"
15567
15786
  },
@@ -25,7 +25,8 @@
25
25
  "forms",
26
26
  "subTypes",
27
27
  "incidentsSummary",
28
- "testResults"
28
+ "testResults",
29
+ "dataTransformLogic"
29
30
  ]
30
31
  },
31
32
  "name": "DataJobKey",
@@ -11,7 +11,11 @@
11
11
  "dataProcessInstanceRelationships",
12
12
  "dataProcessInstanceRunEvent",
13
13
  "status",
14
- "testResults"
14
+ "testResults",
15
+ "dataPlatformInstance",
16
+ "subTypes",
17
+ "container",
18
+ "mlTrainingRunProperties"
15
19
  ],
16
20
  "entityDoc": "DataProcessInstance represents an instance of a datajob/jobflow run"
17
21
  },
@@ -10,7 +10,8 @@
10
10
  "Relationship": {
11
11
  "/*": {
12
12
  "entityTypes": [
13
- "dataset"
13
+ "dataset",
14
+ "mlModel"
14
15
  ],
15
16
  "name": "Produces"
16
17
  }
@@ -0,0 +1,63 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "dataTransformLogic"
5
+ },
6
+ "name": "DataTransformLogic",
7
+ "namespace": "com.linkedin.pegasus2avro.common",
8
+ "fields": [
9
+ {
10
+ "type": {
11
+ "type": "array",
12
+ "items": {
13
+ "type": "record",
14
+ "name": "DataTransform",
15
+ "namespace": "com.linkedin.pegasus2avro.common",
16
+ "fields": [
17
+ {
18
+ "type": [
19
+ "null",
20
+ {
21
+ "type": "record",
22
+ "name": "QueryStatement",
23
+ "namespace": "com.linkedin.pegasus2avro.query",
24
+ "fields": [
25
+ {
26
+ "type": "string",
27
+ "name": "value",
28
+ "doc": "The query text"
29
+ },
30
+ {
31
+ "type": {
32
+ "type": "enum",
33
+ "symbolDocs": {
34
+ "SQL": "A SQL Query"
35
+ },
36
+ "name": "QueryLanguage",
37
+ "namespace": "com.linkedin.pegasus2avro.query",
38
+ "symbols": [
39
+ "SQL"
40
+ ]
41
+ },
42
+ "name": "language",
43
+ "default": "SQL",
44
+ "doc": "The language of the Query, e.g. SQL."
45
+ }
46
+ ],
47
+ "doc": "A query statement against one or more data assets."
48
+ }
49
+ ],
50
+ "name": "queryStatement",
51
+ "default": null,
52
+ "doc": "The data transform may be defined by a query statement"
53
+ }
54
+ ],
55
+ "doc": "Information about a transformation. It may be a query,"
56
+ }
57
+ },
58
+ "name": "transforms",
59
+ "doc": "List of transformations applied"
60
+ }
61
+ ],
62
+ "doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
63
+ }
@@ -21,6 +21,21 @@
21
21
  "default": {},
22
22
  "doc": "Custom property bag."
23
23
  },
24
+ {
25
+ "Searchable": {
26
+ "boostScore": 10.0,
27
+ "enableAutocomplete": true,
28
+ "fieldType": "WORD_GRAM",
29
+ "queryByDefault": true
30
+ },
31
+ "type": [
32
+ "null",
33
+ "string"
34
+ ],
35
+ "name": "name",
36
+ "default": null,
37
+ "doc": "Display name of the MLModelGroup"
38
+ },
24
39
  {
25
40
  "Searchable": {
26
41
  "fieldType": "TEXT",
@@ -35,6 +50,7 @@
35
50
  "doc": "Documentation of the MLModelGroup"
36
51
  },
37
52
  {
53
+ "deprecated": true,
38
54
  "type": [
39
55
  "null",
40
56
  "long"
@@ -43,6 +59,72 @@
43
59
  "default": null,
44
60
  "doc": "Date when the MLModelGroup was developed"
45
61
  },
62
+ {
63
+ "type": [
64
+ "null",
65
+ {
66
+ "type": "record",
67
+ "name": "TimeStamp",
68
+ "namespace": "com.linkedin.pegasus2avro.common",
69
+ "fields": [
70
+ {
71
+ "type": "long",
72
+ "name": "time",
73
+ "doc": "When did the event occur"
74
+ },
75
+ {
76
+ "java": {
77
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
78
+ },
79
+ "type": [
80
+ "null",
81
+ "string"
82
+ ],
83
+ "name": "actor",
84
+ "default": null,
85
+ "doc": "Optional: The actor urn involved in the event.",
86
+ "Urn": "Urn"
87
+ }
88
+ ],
89
+ "doc": "A standard event timestamp"
90
+ }
91
+ ],
92
+ "name": "created",
93
+ "default": null,
94
+ "doc": "Time and Actor who created the MLModelGroup"
95
+ },
96
+ {
97
+ "type": [
98
+ "null",
99
+ "com.linkedin.pegasus2avro.common.TimeStamp"
100
+ ],
101
+ "name": "lastModified",
102
+ "default": null,
103
+ "doc": "Date when the MLModelGroup was last modified"
104
+ },
105
+ {
106
+ "Relationship": {
107
+ "/*": {
108
+ "entityTypes": [
109
+ "dataJob"
110
+ ],
111
+ "isLineage": true,
112
+ "name": "TrainedBy"
113
+ }
114
+ },
115
+ "type": [
116
+ "null",
117
+ {
118
+ "type": "array",
119
+ "items": "string"
120
+ }
121
+ ],
122
+ "name": "trainingJobs",
123
+ "default": null,
124
+ "doc": "List of jobs (if any) used to train the model group. Visible in Lineage.",
125
+ "Urn": "Urn",
126
+ "urn_is_array": true
127
+ },
46
128
  {
47
129
  "type": [
48
130
  "null",