acryl-datahub 0.15.0.1rc6__py3-none-any.whl → 0.15.0.1rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (27) hide show
  1. {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/METADATA +2544 -2544
  2. {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/RECORD +27 -26
  3. datahub/__init__.py +1 -1
  4. datahub/ingestion/source/looker/looker_common.py +9 -0
  5. datahub/ingestion/source/looker/looker_source.py +19 -3
  6. datahub/ingestion/source/looker/looker_usage.py +23 -17
  7. datahub/ingestion/source/mode.py +14 -7
  8. datahub/ingestion/source/snowflake/snowflake_config.py +3 -25
  9. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -10
  10. datahub/ingestion/source/snowflake/snowflake_query.py +0 -9
  11. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +1 -5
  12. datahub/ingestion/source/snowflake/snowflake_shares.py +1 -1
  13. datahub/ingestion/source/snowflake/snowflake_v2.py +14 -6
  14. datahub/ingestion/source/tableau/tableau.py +51 -20
  15. datahub/ingestion/source_report/ingestion_stage.py +1 -0
  16. datahub/metadata/_schema_classes.py +195 -2
  17. datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
  18. datahub/metadata/schema.avsc +188 -4
  19. datahub/metadata/schemas/DataProcessInstanceKey.avsc +5 -1
  20. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  21. datahub/metadata/schemas/MLModelGroupProperties.avsc +82 -0
  22. datahub/metadata/schemas/MLModelProperties.avsc +62 -2
  23. datahub/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
  24. datahub/metadata/schemas/MetadataChangeEvent.avsc +94 -2
  25. {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/WHEEL +0 -0
  26. {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/entry_points.txt +0 -0
  27. {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/top_level.txt +0 -0
@@ -6827,6 +6827,21 @@
6827
6827
  "default": null,
6828
6828
  "doc": "URL where the reference exist"
6829
6829
  },
6830
+ {
6831
+ "Searchable": {
6832
+ "boostScore": 10.0,
6833
+ "enableAutocomplete": true,
6834
+ "fieldType": "WORD_GRAM",
6835
+ "queryByDefault": true
6836
+ },
6837
+ "type": [
6838
+ "null",
6839
+ "string"
6840
+ ],
6841
+ "name": "name",
6842
+ "default": null,
6843
+ "doc": "Display name of the MLModel"
6844
+ },
6830
6845
  {
6831
6846
  "Searchable": {
6832
6847
  "fieldType": "TEXT",
@@ -6841,6 +6856,7 @@
6841
6856
  "doc": "Documentation of the MLModel"
6842
6857
  },
6843
6858
  {
6859
+ "deprecated": true,
6844
6860
  "type": [
6845
6861
  "null",
6846
6862
  "long"
@@ -6849,6 +6865,24 @@
6849
6865
  "default": null,
6850
6866
  "doc": "Date when the MLModel was developed"
6851
6867
  },
6868
+ {
6869
+ "type": [
6870
+ "null",
6871
+ "com.linkedin.pegasus2avro.common.TimeStamp"
6872
+ ],
6873
+ "name": "created",
6874
+ "default": null,
6875
+ "doc": "Audit stamp containing who created this and when"
6876
+ },
6877
+ {
6878
+ "type": [
6879
+ "null",
6880
+ "com.linkedin.pegasus2avro.common.TimeStamp"
6881
+ ],
6882
+ "name": "lastModified",
6883
+ "default": null,
6884
+ "doc": "Date when the MLModel was last modified"
6885
+ },
6852
6886
  {
6853
6887
  "type": [
6854
6888
  "null",
@@ -7081,7 +7115,8 @@
7081
7115
  "Relationship": {
7082
7116
  "/*": {
7083
7117
  "entityTypes": [
7084
- "dataJob"
7118
+ "dataJob",
7119
+ "dataProcessInstance"
7085
7120
  ],
7086
7121
  "isLineage": true,
7087
7122
  "name": "TrainedBy"
@@ -7098,7 +7133,7 @@
7098
7133
  ],
7099
7134
  "name": "trainingJobs",
7100
7135
  "default": null,
7101
- "doc": "List of jobs (if any) used to train the model"
7136
+ "doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
7102
7137
  },
7103
7138
  {
7104
7139
  "Relationship": {
@@ -8415,6 +8450,21 @@
8415
8450
  "default": {},
8416
8451
  "doc": "Custom property bag."
8417
8452
  },
8453
+ {
8454
+ "Searchable": {
8455
+ "boostScore": 10.0,
8456
+ "enableAutocomplete": true,
8457
+ "fieldType": "WORD_GRAM",
8458
+ "queryByDefault": true
8459
+ },
8460
+ "type": [
8461
+ "null",
8462
+ "string"
8463
+ ],
8464
+ "name": "name",
8465
+ "default": null,
8466
+ "doc": "Display name of the MLModelGroup"
8467
+ },
8418
8468
  {
8419
8469
  "Searchable": {
8420
8470
  "fieldType": "TEXT",
@@ -8429,6 +8479,7 @@
8429
8479
  "doc": "Documentation of the MLModelGroup"
8430
8480
  },
8431
8481
  {
8482
+ "deprecated": true,
8432
8483
  "type": [
8433
8484
  "null",
8434
8485
  "long"
@@ -8437,6 +8488,47 @@
8437
8488
  "default": null,
8438
8489
  "doc": "Date when the MLModelGroup was developed"
8439
8490
  },
8491
+ {
8492
+ "type": [
8493
+ "null",
8494
+ "com.linkedin.pegasus2avro.common.TimeStamp"
8495
+ ],
8496
+ "name": "created",
8497
+ "default": null,
8498
+ "doc": "Time and Actor who created the MLModelGroup"
8499
+ },
8500
+ {
8501
+ "type": [
8502
+ "null",
8503
+ "com.linkedin.pegasus2avro.common.TimeStamp"
8504
+ ],
8505
+ "name": "lastModified",
8506
+ "default": null,
8507
+ "doc": "Date when the MLModelGroup was last modified"
8508
+ },
8509
+ {
8510
+ "Relationship": {
8511
+ "/*": {
8512
+ "entityTypes": [
8513
+ "dataJob"
8514
+ ],
8515
+ "isLineage": true,
8516
+ "name": "TrainedBy"
8517
+ }
8518
+ },
8519
+ "Urn": "Urn",
8520
+ "urn_is_array": true,
8521
+ "type": [
8522
+ "null",
8523
+ {
8524
+ "type": "array",
8525
+ "items": "string"
8526
+ }
8527
+ ],
8528
+ "name": "trainingJobs",
8529
+ "default": null,
8530
+ "doc": "List of jobs (if any) used to train the model group. Visible in Lineage."
8531
+ },
8440
8532
  {
8441
8533
  "type": [
8442
8534
  "null",
@@ -12619,7 +12711,8 @@
12619
12711
  "Relationship": {
12620
12712
  "/*": {
12621
12713
  "entityTypes": [
12622
- "dataset"
12714
+ "dataset",
12715
+ "mlModel"
12623
12716
  ],
12624
12717
  "name": "Produces"
12625
12718
  }
@@ -12944,6 +13037,93 @@
12944
13037
  "doc": "Properties associated with a MLPrimaryKey editable from the UI"
12945
13038
  },
12946
13039
  "com.linkedin.pegasus2avro.ml.metadata.SourceCode",
13040
+ {
13041
+ "type": "record",
13042
+ "Aspect": {
13043
+ "name": "mlTrainingRunProperties"
13044
+ },
13045
+ "name": "MLTrainingRunProperties",
13046
+ "namespace": "com.linkedin.pegasus2avro.ml.metadata",
13047
+ "fields": [
13048
+ {
13049
+ "Searchable": {
13050
+ "/*": {
13051
+ "fieldType": "TEXT",
13052
+ "queryByDefault": true
13053
+ }
13054
+ },
13055
+ "type": {
13056
+ "type": "map",
13057
+ "values": "string"
13058
+ },
13059
+ "name": "customProperties",
13060
+ "default": {},
13061
+ "doc": "Custom property bag."
13062
+ },
13063
+ {
13064
+ "Searchable": {
13065
+ "fieldType": "KEYWORD"
13066
+ },
13067
+ "java": {
13068
+ "class": "com.linkedin.pegasus2avro.common.url.Url",
13069
+ "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer"
13070
+ },
13071
+ "type": [
13072
+ "null",
13073
+ "string"
13074
+ ],
13075
+ "name": "externalUrl",
13076
+ "default": null,
13077
+ "doc": "URL where the reference exist"
13078
+ },
13079
+ {
13080
+ "type": [
13081
+ "null",
13082
+ "string"
13083
+ ],
13084
+ "name": "id",
13085
+ "default": null,
13086
+ "doc": "Run Id of the ML Training Run"
13087
+ },
13088
+ {
13089
+ "type": [
13090
+ "null",
13091
+ {
13092
+ "type": "array",
13093
+ "items": "string"
13094
+ }
13095
+ ],
13096
+ "name": "outputUrls",
13097
+ "default": null,
13098
+ "doc": "List of URLs for the Outputs of the ML Training Run"
13099
+ },
13100
+ {
13101
+ "type": [
13102
+ "null",
13103
+ {
13104
+ "type": "array",
13105
+ "items": "com.linkedin.pegasus2avro.ml.metadata.MLHyperParam"
13106
+ }
13107
+ ],
13108
+ "name": "hyperParams",
13109
+ "default": null,
13110
+ "doc": "Hyperparameters of the ML Training Run"
13111
+ },
13112
+ {
13113
+ "type": [
13114
+ "null",
13115
+ {
13116
+ "type": "array",
13117
+ "items": "com.linkedin.pegasus2avro.ml.metadata.MLMetric"
13118
+ }
13119
+ ],
13120
+ "name": "trainingMetrics",
13121
+ "default": null,
13122
+ "doc": "Metrics of the ML Training Run"
13123
+ }
13124
+ ],
13125
+ "doc": "The inputs and outputs of this training run"
13126
+ },
12947
13127
  "com.linkedin.pegasus2avro.ml.metadata.EthicalConsiderations",
12948
13128
  "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties",
12949
13129
  "com.linkedin.pegasus2avro.ml.metadata.MLModelFactorPrompts",
@@ -15561,7 +15741,11 @@
15561
15741
  "dataProcessInstanceRelationships",
15562
15742
  "dataProcessInstanceRunEvent",
15563
15743
  "status",
15564
- "testResults"
15744
+ "testResults",
15745
+ "dataPlatformInstance",
15746
+ "subTypes",
15747
+ "container",
15748
+ "mlTrainingRunProperties"
15565
15749
  ],
15566
15750
  "entityDoc": "DataProcessInstance represents an instance of a datajob/jobflow run"
15567
15751
  },
@@ -11,7 +11,11 @@
11
11
  "dataProcessInstanceRelationships",
12
12
  "dataProcessInstanceRunEvent",
13
13
  "status",
14
- "testResults"
14
+ "testResults",
15
+ "dataPlatformInstance",
16
+ "subTypes",
17
+ "container",
18
+ "mlTrainingRunProperties"
15
19
  ],
16
20
  "entityDoc": "DataProcessInstance represents an instance of a datajob/jobflow run"
17
21
  },
@@ -10,7 +10,8 @@
10
10
  "Relationship": {
11
11
  "/*": {
12
12
  "entityTypes": [
13
- "dataset"
13
+ "dataset",
14
+ "mlModel"
14
15
  ],
15
16
  "name": "Produces"
16
17
  }
@@ -21,6 +21,21 @@
21
21
  "default": {},
22
22
  "doc": "Custom property bag."
23
23
  },
24
+ {
25
+ "Searchable": {
26
+ "boostScore": 10.0,
27
+ "enableAutocomplete": true,
28
+ "fieldType": "WORD_GRAM",
29
+ "queryByDefault": true
30
+ },
31
+ "type": [
32
+ "null",
33
+ "string"
34
+ ],
35
+ "name": "name",
36
+ "default": null,
37
+ "doc": "Display name of the MLModelGroup"
38
+ },
24
39
  {
25
40
  "Searchable": {
26
41
  "fieldType": "TEXT",
@@ -35,6 +50,7 @@
35
50
  "doc": "Documentation of the MLModelGroup"
36
51
  },
37
52
  {
53
+ "deprecated": true,
38
54
  "type": [
39
55
  "null",
40
56
  "long"
@@ -43,6 +59,72 @@
43
59
  "default": null,
44
60
  "doc": "Date when the MLModelGroup was developed"
45
61
  },
62
+ {
63
+ "type": [
64
+ "null",
65
+ {
66
+ "type": "record",
67
+ "name": "TimeStamp",
68
+ "namespace": "com.linkedin.pegasus2avro.common",
69
+ "fields": [
70
+ {
71
+ "type": "long",
72
+ "name": "time",
73
+ "doc": "When did the event occur"
74
+ },
75
+ {
76
+ "java": {
77
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
78
+ },
79
+ "type": [
80
+ "null",
81
+ "string"
82
+ ],
83
+ "name": "actor",
84
+ "default": null,
85
+ "doc": "Optional: The actor urn involved in the event.",
86
+ "Urn": "Urn"
87
+ }
88
+ ],
89
+ "doc": "A standard event timestamp"
90
+ }
91
+ ],
92
+ "name": "created",
93
+ "default": null,
94
+ "doc": "Time and Actor who created the MLModelGroup"
95
+ },
96
+ {
97
+ "type": [
98
+ "null",
99
+ "com.linkedin.pegasus2avro.common.TimeStamp"
100
+ ],
101
+ "name": "lastModified",
102
+ "default": null,
103
+ "doc": "Date when the MLModelGroup was last modified"
104
+ },
105
+ {
106
+ "Relationship": {
107
+ "/*": {
108
+ "entityTypes": [
109
+ "dataJob"
110
+ ],
111
+ "isLineage": true,
112
+ "name": "TrainedBy"
113
+ }
114
+ },
115
+ "type": [
116
+ "null",
117
+ {
118
+ "type": "array",
119
+ "items": "string"
120
+ }
121
+ ],
122
+ "name": "trainingJobs",
123
+ "default": null,
124
+ "doc": "List of jobs (if any) used to train the model group. Visible in Lineage.",
125
+ "Urn": "Urn",
126
+ "urn_is_array": true
127
+ },
46
128
  {
47
129
  "type": [
48
130
  "null",
@@ -37,6 +37,21 @@
37
37
  "default": null,
38
38
  "doc": "URL where the reference exist"
39
39
  },
40
+ {
41
+ "Searchable": {
42
+ "boostScore": 10.0,
43
+ "enableAutocomplete": true,
44
+ "fieldType": "WORD_GRAM",
45
+ "queryByDefault": true
46
+ },
47
+ "type": [
48
+ "null",
49
+ "string"
50
+ ],
51
+ "name": "name",
52
+ "default": null,
53
+ "doc": "Display name of the MLModel"
54
+ },
40
55
  {
41
56
  "Searchable": {
42
57
  "fieldType": "TEXT",
@@ -51,6 +66,7 @@
51
66
  "doc": "Documentation of the MLModel"
52
67
  },
53
68
  {
69
+ "deprecated": true,
54
70
  "type": [
55
71
  "null",
56
72
  "long"
@@ -59,6 +75,49 @@
59
75
  "default": null,
60
76
  "doc": "Date when the MLModel was developed"
61
77
  },
78
+ {
79
+ "type": [
80
+ "null",
81
+ {
82
+ "type": "record",
83
+ "name": "TimeStamp",
84
+ "namespace": "com.linkedin.pegasus2avro.common",
85
+ "fields": [
86
+ {
87
+ "type": "long",
88
+ "name": "time",
89
+ "doc": "When did the event occur"
90
+ },
91
+ {
92
+ "java": {
93
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
94
+ },
95
+ "type": [
96
+ "null",
97
+ "string"
98
+ ],
99
+ "name": "actor",
100
+ "default": null,
101
+ "doc": "Optional: The actor urn involved in the event.",
102
+ "Urn": "Urn"
103
+ }
104
+ ],
105
+ "doc": "A standard event timestamp"
106
+ }
107
+ ],
108
+ "name": "created",
109
+ "default": null,
110
+ "doc": "Audit stamp containing who created this and when"
111
+ },
112
+ {
113
+ "type": [
114
+ "null",
115
+ "com.linkedin.pegasus2avro.common.TimeStamp"
116
+ ],
117
+ "name": "lastModified",
118
+ "default": null,
119
+ "doc": "Date when the MLModel was last modified"
120
+ },
62
121
  {
63
122
  "type": [
64
123
  "null",
@@ -291,7 +350,8 @@
291
350
  "Relationship": {
292
351
  "/*": {
293
352
  "entityTypes": [
294
- "dataJob"
353
+ "dataJob",
354
+ "dataProcessInstance"
295
355
  ],
296
356
  "isLineage": true,
297
357
  "name": "TrainedBy"
@@ -306,7 +366,7 @@
306
366
  ],
307
367
  "name": "trainingJobs",
308
368
  "default": null,
309
- "doc": "List of jobs (if any) used to train the model",
369
+ "doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.",
310
370
  "Urn": "Urn",
311
371
  "urn_is_array": true
312
372
  },
@@ -0,0 +1,171 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "mlTrainingRunProperties"
5
+ },
6
+ "name": "MLTrainingRunProperties",
7
+ "namespace": "com.linkedin.pegasus2avro.ml.metadata",
8
+ "fields": [
9
+ {
10
+ "Searchable": {
11
+ "/*": {
12
+ "fieldType": "TEXT",
13
+ "queryByDefault": true
14
+ }
15
+ },
16
+ "type": {
17
+ "type": "map",
18
+ "values": "string"
19
+ },
20
+ "name": "customProperties",
21
+ "default": {},
22
+ "doc": "Custom property bag."
23
+ },
24
+ {
25
+ "Searchable": {
26
+ "fieldType": "KEYWORD"
27
+ },
28
+ "java": {
29
+ "class": "com.linkedin.pegasus2avro.common.url.Url",
30
+ "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer"
31
+ },
32
+ "type": [
33
+ "null",
34
+ "string"
35
+ ],
36
+ "name": "externalUrl",
37
+ "default": null,
38
+ "doc": "URL where the reference exist"
39
+ },
40
+ {
41
+ "type": [
42
+ "null",
43
+ "string"
44
+ ],
45
+ "name": "id",
46
+ "default": null,
47
+ "doc": "Run Id of the ML Training Run"
48
+ },
49
+ {
50
+ "type": [
51
+ "null",
52
+ {
53
+ "type": "array",
54
+ "items": "string"
55
+ }
56
+ ],
57
+ "name": "outputUrls",
58
+ "default": null,
59
+ "doc": "List of URLs for the Outputs of the ML Training Run"
60
+ },
61
+ {
62
+ "type": [
63
+ "null",
64
+ {
65
+ "type": "array",
66
+ "items": {
67
+ "type": "record",
68
+ "Aspect": {
69
+ "name": "mlHyperParam"
70
+ },
71
+ "name": "MLHyperParam",
72
+ "namespace": "com.linkedin.pegasus2avro.ml.metadata",
73
+ "fields": [
74
+ {
75
+ "type": "string",
76
+ "name": "name",
77
+ "doc": "Name of the MLHyperParam"
78
+ },
79
+ {
80
+ "type": [
81
+ "null",
82
+ "string"
83
+ ],
84
+ "name": "description",
85
+ "default": null,
86
+ "doc": "Documentation of the MLHyperParam"
87
+ },
88
+ {
89
+ "type": [
90
+ "null",
91
+ "string"
92
+ ],
93
+ "name": "value",
94
+ "default": null,
95
+ "doc": "The value of the MLHyperParam"
96
+ },
97
+ {
98
+ "type": [
99
+ "null",
100
+ "long"
101
+ ],
102
+ "name": "createdAt",
103
+ "default": null,
104
+ "doc": "Date when the MLHyperParam was developed"
105
+ }
106
+ ],
107
+ "doc": "Properties associated with an ML Hyper Param"
108
+ }
109
+ }
110
+ ],
111
+ "name": "hyperParams",
112
+ "default": null,
113
+ "doc": "Hyperparameters of the ML Training Run"
114
+ },
115
+ {
116
+ "type": [
117
+ "null",
118
+ {
119
+ "type": "array",
120
+ "items": {
121
+ "type": "record",
122
+ "Aspect": {
123
+ "name": "mlMetric"
124
+ },
125
+ "name": "MLMetric",
126
+ "namespace": "com.linkedin.pegasus2avro.ml.metadata",
127
+ "fields": [
128
+ {
129
+ "type": "string",
130
+ "name": "name",
131
+ "doc": "Name of the mlMetric"
132
+ },
133
+ {
134
+ "type": [
135
+ "null",
136
+ "string"
137
+ ],
138
+ "name": "description",
139
+ "default": null,
140
+ "doc": "Documentation of the mlMetric"
141
+ },
142
+ {
143
+ "type": [
144
+ "null",
145
+ "string"
146
+ ],
147
+ "name": "value",
148
+ "default": null,
149
+ "doc": "The value of the mlMetric"
150
+ },
151
+ {
152
+ "type": [
153
+ "null",
154
+ "long"
155
+ ],
156
+ "name": "createdAt",
157
+ "default": null,
158
+ "doc": "Date when the mlMetric was developed"
159
+ }
160
+ ],
161
+ "doc": "Properties associated with an ML Metric"
162
+ }
163
+ }
164
+ ],
165
+ "name": "trainingMetrics",
166
+ "default": null,
167
+ "doc": "Metrics of the ML Training Run"
168
+ }
169
+ ],
170
+ "doc": "The inputs and outputs of this training run"
171
+ }