acryl-datahub-cloud 0.3.7.9rc1__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (66) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/acryl_cs_issues/source.py +0 -1
  3. acryl_datahub_cloud/api/__init__.py +1 -0
  4. acryl_datahub_cloud/api/client.py +6 -0
  5. acryl_datahub_cloud/api/entity_versioning.py +167 -0
  6. acryl_datahub_cloud/datahub_metadata_sharing/__init__.py +0 -0
  7. acryl_datahub_cloud/datahub_metadata_sharing/metadata_sharing_source.py +267 -0
  8. acryl_datahub_cloud/datahub_metadata_sharing/query.py +7 -0
  9. acryl_datahub_cloud/datahub_metadata_sharing/scroll_shared_entities.gql +204 -0
  10. acryl_datahub_cloud/datahub_metadata_sharing/share_entity.gql +9 -0
  11. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +0 -2
  12. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +0 -1
  13. acryl_datahub_cloud/datahub_reporting/extract_graph.py +0 -1
  14. acryl_datahub_cloud/datahub_reporting/extract_sql.py +0 -1
  15. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +79 -57
  16. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +284 -258
  17. acryl_datahub_cloud/lineage_features/source.py +22 -5
  18. acryl_datahub_cloud/metadata/_urns/urn_defs.py +1564 -1465
  19. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
  20. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executor/__init__.py +15 -0
  21. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  22. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
  23. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
  24. acryl_datahub_cloud/metadata/schema.avsc +23777 -22729
  25. acryl_datahub_cloud/metadata/schema_classes.py +1322 -519
  26. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +1 -1
  27. acryl_datahub_cloud/metadata/schemas/AssertionInferenceDetails.avsc +1 -1
  28. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +1 -1
  29. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +1 -1
  30. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  31. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  32. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceInfo.avsc +6 -0
  33. acryl_datahub_cloud/metadata/schemas/DataHubViewInfo.avsc +2 -0
  34. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +3 -1
  35. acryl_datahub_cloud/metadata/schemas/DataProcessInstanceKey.avsc +4 -0
  36. acryl_datahub_cloud/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  37. acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +63 -0
  38. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +2 -1
  39. acryl_datahub_cloud/metadata/schemas/Deprecation.avsc +12 -0
  40. acryl_datahub_cloud/metadata/schemas/DynamicFormAssignment.avsc +2 -0
  41. acryl_datahub_cloud/metadata/schemas/EntityTypeKey.avsc +1 -0
  42. acryl_datahub_cloud/metadata/schemas/ExecutionRequestInput.avsc +9 -0
  43. acryl_datahub_cloud/metadata/schemas/ExecutionRequestResult.avsc +14 -0
  44. acryl_datahub_cloud/metadata/schemas/Filter.avsc +2 -0
  45. acryl_datahub_cloud/metadata/schemas/MLFeatureProperties.avsc +51 -0
  46. acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
  47. acryl_datahub_cloud/metadata/schemas/MLModelGroupProperties.avsc +155 -0
  48. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +2 -1
  49. acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +155 -47
  50. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
  51. acryl_datahub_cloud/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
  52. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +178 -47
  53. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +10 -1
  54. acryl_datahub_cloud/metadata/schemas/PostInfo.avsc +23 -0
  55. acryl_datahub_cloud/metadata/schemas/RecommendationModule.avsc +2 -0
  56. acryl_datahub_cloud/metadata/schemas/RemoteExecutorKey.avsc +21 -0
  57. acryl_datahub_cloud/metadata/schemas/RemoteExecutorStatus.avsc +80 -0
  58. acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +2 -1
  59. acryl_datahub_cloud/metadata/schemas/VersionProperties.avsc +216 -0
  60. acryl_datahub_cloud/metadata/schemas/VersionSetKey.avsc +26 -0
  61. acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc +49 -0
  62. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/METADATA +57 -47
  63. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/RECORD +66 -49
  64. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/WHEEL +1 -1
  65. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/entry_points.txt +1 -0
  66. {acryl_datahub_cloud-0.3.7.9rc1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/top_level.txt +0 -0
@@ -5769,6 +5769,66 @@
5769
5769
  "default": null,
5770
5770
  "doc": "URL where the reference exist"
5771
5771
  },
5772
+ {
5773
+ "Relationship": {
5774
+ "/*": {
5775
+ "entityTypes": [
5776
+ "dataJob",
5777
+ "dataProcessInstance"
5778
+ ],
5779
+ "isLineage": true,
5780
+ "name": "TrainedBy"
5781
+ }
5782
+ },
5783
+ "type": [
5784
+ "null",
5785
+ {
5786
+ "type": "array",
5787
+ "items": "string"
5788
+ }
5789
+ ],
5790
+ "name": "trainingJobs",
5791
+ "default": null,
5792
+ "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
5793
+ },
5794
+ {
5795
+ "Relationship": {
5796
+ "/*": {
5797
+ "entityTypes": [
5798
+ "dataJob",
5799
+ "dataProcessInstance"
5800
+ ],
5801
+ "isLineage": true,
5802
+ "isUpstream": false,
5803
+ "name": "UsedBy"
5804
+ }
5805
+ },
5806
+ "type": [
5807
+ "null",
5808
+ {
5809
+ "type": "array",
5810
+ "items": "string"
5811
+ }
5812
+ ],
5813
+ "name": "downstreamJobs",
5814
+ "default": null,
5815
+ "doc": "List of jobs or process instances (if any) that use the model or group."
5816
+ },
5817
+ {
5818
+ "Searchable": {
5819
+ "boostScore": 10.0,
5820
+ "enableAutocomplete": true,
5821
+ "fieldType": "WORD_GRAM",
5822
+ "queryByDefault": true
5823
+ },
5824
+ "type": [
5825
+ "null",
5826
+ "string"
5827
+ ],
5828
+ "name": "name",
5829
+ "default": null,
5830
+ "doc": "Display name of the MLModel"
5831
+ },
5772
5832
  {
5773
5833
  "Searchable": {
5774
5834
  "fieldType": "TEXT",
@@ -5783,6 +5843,7 @@
5783
5843
  "doc": "Documentation of the MLModel"
5784
5844
  },
5785
5845
  {
5846
+ "deprecated": true,
5786
5847
  "type": [
5787
5848
  "null",
5788
5849
  "long"
@@ -5791,6 +5852,24 @@
5791
5852
  "default": null,
5792
5853
  "doc": "Date when the MLModel was developed"
5793
5854
  },
5855
+ {
5856
+ "type": [
5857
+ "null",
5858
+ "com.linkedin.pegasus2avro.common.TimeStamp"
5859
+ ],
5860
+ "name": "created",
5861
+ "default": null,
5862
+ "doc": "Audit stamp containing who created this and when"
5863
+ },
5864
+ {
5865
+ "type": [
5866
+ "null",
5867
+ "com.linkedin.pegasus2avro.common.TimeStamp"
5868
+ ],
5869
+ "name": "lastModified",
5870
+ "default": null,
5871
+ "doc": "Date when the MLModel was last modified"
5872
+ },
5794
5873
  {
5795
5874
  "type": [
5796
5875
  "null",
@@ -5806,6 +5885,14 @@
5806
5885
  ],
5807
5886
  "name": "versionTag",
5808
5887
  "default": null
5888
+ },
5889
+ {
5890
+ "type": [
5891
+ "null",
5892
+ "com.linkedin.pegasus2avro.common.MetadataAttribution"
5893
+ ],
5894
+ "name": "metadataAttribution",
5895
+ "default": null
5809
5896
  }
5810
5897
  ],
5811
5898
  "doc": "A resource-defined string representing the resource state for the purpose of concurrency control"
@@ -6019,53 +6106,6 @@
6019
6106
  "Urn": "Urn",
6020
6107
  "urn_is_array": true
6021
6108
  },
6022
- {
6023
- "Relationship": {
6024
- "/*": {
6025
- "entityTypes": [
6026
- "dataJob"
6027
- ],
6028
- "isLineage": true,
6029
- "name": "TrainedBy"
6030
- }
6031
- },
6032
- "type": [
6033
- "null",
6034
- {
6035
- "type": "array",
6036
- "items": "string"
6037
- }
6038
- ],
6039
- "name": "trainingJobs",
6040
- "default": null,
6041
- "doc": "List of jobs (if any) used to train the model",
6042
- "Urn": "Urn",
6043
- "urn_is_array": true
6044
- },
6045
- {
6046
- "Relationship": {
6047
- "/*": {
6048
- "entityTypes": [
6049
- "dataJob"
6050
- ],
6051
- "isLineage": true,
6052
- "isUpstream": false,
6053
- "name": "UsedBy"
6054
- }
6055
- },
6056
- "type": [
6057
- "null",
6058
- {
6059
- "type": "array",
6060
- "items": "string"
6061
- }
6062
- ],
6063
- "name": "downstreamJobs",
6064
- "default": null,
6065
- "doc": "List of jobs (if any) that use the model",
6066
- "Urn": "Urn",
6067
- "urn_is_array": true
6068
- },
6069
6109
  {
6070
6110
  "Relationship": {
6071
6111
  "/*": {
@@ -6671,6 +6711,18 @@
6671
6711
  "name": "actor",
6672
6712
  "doc": "The user URN which will be credited for modifying this deprecation content.",
6673
6713
  "Urn": "Urn"
6714
+ },
6715
+ {
6716
+ "java": {
6717
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
6718
+ },
6719
+ "type": [
6720
+ "null",
6721
+ "string"
6722
+ ],
6723
+ "name": "replacement",
6724
+ "default": null,
6725
+ "Urn": "Urn"
6674
6726
  }
6675
6727
  ],
6676
6728
  "doc": "Deprecation status of an entity"
@@ -7468,6 +7520,66 @@
7468
7520
  "default": {},
7469
7521
  "doc": "Custom property bag."
7470
7522
  },
7523
+ {
7524
+ "Relationship": {
7525
+ "/*": {
7526
+ "entityTypes": [
7527
+ "dataJob",
7528
+ "dataProcessInstance"
7529
+ ],
7530
+ "isLineage": true,
7531
+ "name": "TrainedBy"
7532
+ }
7533
+ },
7534
+ "type": [
7535
+ "null",
7536
+ {
7537
+ "type": "array",
7538
+ "items": "string"
7539
+ }
7540
+ ],
7541
+ "name": "trainingJobs",
7542
+ "default": null,
7543
+ "doc": "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect."
7544
+ },
7545
+ {
7546
+ "Relationship": {
7547
+ "/*": {
7548
+ "entityTypes": [
7549
+ "dataJob",
7550
+ "dataProcessInstance"
7551
+ ],
7552
+ "isLineage": true,
7553
+ "isUpstream": false,
7554
+ "name": "UsedBy"
7555
+ }
7556
+ },
7557
+ "type": [
7558
+ "null",
7559
+ {
7560
+ "type": "array",
7561
+ "items": "string"
7562
+ }
7563
+ ],
7564
+ "name": "downstreamJobs",
7565
+ "default": null,
7566
+ "doc": "List of jobs or process instances (if any) that use the model or group."
7567
+ },
7568
+ {
7569
+ "Searchable": {
7570
+ "boostScore": 10.0,
7571
+ "enableAutocomplete": true,
7572
+ "fieldType": "WORD_GRAM",
7573
+ "queryByDefault": true
7574
+ },
7575
+ "type": [
7576
+ "null",
7577
+ "string"
7578
+ ],
7579
+ "name": "name",
7580
+ "default": null,
7581
+ "doc": "Display name of the MLModelGroup"
7582
+ },
7471
7583
  {
7472
7584
  "Searchable": {
7473
7585
  "fieldType": "TEXT",
@@ -7482,6 +7594,7 @@
7482
7594
  "doc": "Documentation of the MLModelGroup"
7483
7595
  },
7484
7596
  {
7597
+ "deprecated": true,
7485
7598
  "type": [
7486
7599
  "null",
7487
7600
  "long"
@@ -7490,6 +7603,24 @@
7490
7603
  "default": null,
7491
7604
  "doc": "Date when the MLModelGroup was developed"
7492
7605
  },
7606
+ {
7607
+ "type": [
7608
+ "null",
7609
+ "com.linkedin.pegasus2avro.common.TimeStamp"
7610
+ ],
7611
+ "name": "created",
7612
+ "default": null,
7613
+ "doc": "Time and Actor who created the MLModelGroup"
7614
+ },
7615
+ {
7616
+ "type": [
7617
+ "null",
7618
+ "com.linkedin.pegasus2avro.common.TimeStamp"
7619
+ ],
7620
+ "name": "lastModified",
7621
+ "default": null,
7622
+ "doc": "Date when the MLModelGroup was last modified"
7623
+ },
7493
7624
  {
7494
7625
  "type": [
7495
7626
  "null",
@@ -3027,7 +3027,7 @@
3027
3027
  "doc": "The adjustment algorithm to use when determining the threshold for an assertion."
3028
3028
  },
3029
3029
  "name": "algorithm",
3030
- "doc": "The algorithm to use to adjust assertion values to power the sensitivity control feature"
3030
+ "doc": "The algorithm to use to adjust assertion values to power the sensitivity control feature\n\nNote that for algorithm \"STDDEV\" which leverages the standard deviation of trailing values,\nthe \"stdDev\" field must be provided inside of AssertionEvaluationContext.pdl by the offline pipeline."
3031
3031
  },
3032
3032
  {
3033
3033
  "type": "string",
@@ -3147,6 +3147,15 @@
3147
3147
  "name": "embeddedAssertions",
3148
3148
  "default": null,
3149
3149
  "doc": "Currently used for Smart Assertions\nAn embedded copy of the assertion used to evaluate which will overwrite the referenced assertion\nif present and if the EmbeddedAssertion's evaluationTimeWindow period is valid"
3150
+ },
3151
+ {
3152
+ "type": [
3153
+ "null",
3154
+ "float"
3155
+ ],
3156
+ "name": "stdDev",
3157
+ "default": null,
3158
+ "doc": "The std deviation of the metric values used for training.\nThis is used to determine the final adjusted threshold for the assertion."
3150
3159
  }
3151
3160
  ],
3152
3161
  "doc": "Additional context about assertion being evaluated."
@@ -7,6 +7,7 @@
7
7
  "namespace": "com.linkedin.pegasus2avro.post",
8
8
  "fields": [
9
9
  {
10
+ "Searchable": {},
10
11
  "type": {
11
12
  "type": "enum",
12
13
  "symbolDocs": {
@@ -205,9 +206,20 @@
205
206
  "dataset",
206
207
  "schemaField",
207
208
  "chart",
209
+ "container",
208
210
  "dashboard",
209
211
  "dataFlow",
210
212
  "dataJob",
213
+ "dataProduct",
214
+ "glossaryTerm",
215
+ "glossaryNode",
216
+ "mlModel",
217
+ "mlFeature",
218
+ "notebook",
219
+ "mlFeatureTable",
220
+ "mlPrimaryKey",
221
+ "mlModelGroup",
222
+ "domain",
211
223
  "dataProduct"
212
224
  ],
213
225
  "name": "PostTarget"
@@ -228,9 +240,20 @@
228
240
  "dataset",
229
241
  "schemaField",
230
242
  "chart",
243
+ "container",
231
244
  "dashboard",
232
245
  "dataFlow",
233
246
  "dataJob",
247
+ "dataProduct",
248
+ "glossaryTerm",
249
+ "glossaryNode",
250
+ "mlModel",
251
+ "mlFeature",
252
+ "notebook",
253
+ "mlFeatureTable",
254
+ "mlPrimaryKey",
255
+ "mlModelGroup",
256
+ "domain",
234
257
  "dataProduct"
235
258
  ]
236
259
  }
@@ -130,6 +130,7 @@
130
130
  "type": "enum",
131
131
  "symbolDocs": {
132
132
  "ANCESTORS_INCL": "Represent the relation: URN field matches any nested parent in addition to the given URN",
133
+ "BETWEEN": "Represent the relation within an inclusive range, e.g. 3 <= ownerCount <= 5. Note that the values in Criterion must have two entries for a lower and upper bound.",
133
134
  "CONTAIN": "Represent the relation: String field contains value, e.g. name contains Profile",
134
135
  "DESCENDANTS_INCL": "Represent the relation: URN field any nested children in addition to the given URN",
135
136
  "END_WITH": "Represent the relation: String field ends with value, e.g. name ends with Event",
@@ -159,6 +160,7 @@
159
160
  "IN",
160
161
  "LESS_THAN",
161
162
  "LESS_THAN_OR_EQUAL_TO",
163
+ "BETWEEN",
162
164
  "START_WITH",
163
165
  "DESCENDANTS_INCL",
164
166
  "ANCESTORS_INCL",
@@ -0,0 +1,21 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "dataHubRemoteExecutorKey",
5
+ "keyForEntity": "dataHubRemoteExecutor",
6
+ "entityCategory": "internal",
7
+ "entityAspects": [
8
+ "dataHubRemoteExecutorStatus"
9
+ ]
10
+ },
11
+ "name": "RemoteExecutorKey",
12
+ "namespace": "com.linkedin.pegasus2avro.metadata.key",
13
+ "fields": [
14
+ {
15
+ "type": "string",
16
+ "name": "id",
17
+ "doc": "A unique id for the DataHub Remote Executor."
18
+ }
19
+ ],
20
+ "doc": "Key for an DataHub Remote Executor"
21
+ }
@@ -0,0 +1,80 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "dataHubRemoteExecutorStatus"
5
+ },
6
+ "name": "RemoteExecutorStatus",
7
+ "namespace": "com.linkedin.pegasus2avro.executor",
8
+ "fields": [
9
+ {
10
+ "Searchable": {
11
+ "fieldType": "KEYWORD"
12
+ },
13
+ "type": "string",
14
+ "name": "executorId",
15
+ "doc": "TODO: rename to pipeline id"
16
+ },
17
+ {
18
+ "type": "string",
19
+ "name": "executorReleaseVersion",
20
+ "doc": " Release Version (Tag) embedded into remote executor image"
21
+ },
22
+ {
23
+ "type": "string",
24
+ "name": "executorAddress",
25
+ "doc": " IP address of the host where remote executor is running"
26
+ },
27
+ {
28
+ "type": "string",
29
+ "name": "executorHostname",
30
+ "doc": "Hostname of the host where remote executor is running"
31
+ },
32
+ {
33
+ "type": "float",
34
+ "name": "executorUptime",
35
+ "doc": "Uptime of the remote executor master process"
36
+ },
37
+ {
38
+ "Searchable": {
39
+ "fieldType": "BOOLEAN"
40
+ },
41
+ "type": "boolean",
42
+ "name": "executorExpired",
43
+ "default": false,
44
+ "doc": "Flag indicating whether remote executor status record is stale."
45
+ },
46
+ {
47
+ "type": "boolean",
48
+ "name": "executorStopped",
49
+ "default": false,
50
+ "doc": "Flag indicating whether remote executor is stopped."
51
+ },
52
+ {
53
+ "type": "boolean",
54
+ "name": "executorEmbedded",
55
+ "default": false,
56
+ "doc": "Flag indicating whether remote executor is embedded executor"
57
+ },
58
+ {
59
+ "type": "boolean",
60
+ "name": "executorInternal",
61
+ "default": false,
62
+ "doc": "Flag indicating whether remote executor is Acryl managed executor"
63
+ },
64
+ {
65
+ "type": "boolean",
66
+ "name": "logDeliveryEnabled",
67
+ "default": false,
68
+ "doc": "Flag indicating whether log delivery is enabled by the customer in the given\nremote executor instance."
69
+ },
70
+ {
71
+ "Searchable": {
72
+ "fieldType": "COUNT",
73
+ "queryByDefault": false
74
+ },
75
+ "type": "int",
76
+ "name": "reportedAt",
77
+ "doc": "UTC-based timestamp of the last reported status"
78
+ }
79
+ ]
80
+ }
@@ -14,7 +14,8 @@
14
14
  "documentation",
15
15
  "testResults",
16
16
  "schemaFieldProfile",
17
- "lineageFeatures"
17
+ "lineageFeatures",
18
+ "deprecation"
18
19
  ]
19
20
  },
20
21
  "name": "SchemaFieldKey",