acryl-datahub-cloud 0.3.7.9.1__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (64) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/acryl_cs_issues/source.py +0 -1
  3. acryl_datahub_cloud/api/__init__.py +1 -0
  4. acryl_datahub_cloud/api/client.py +6 -0
  5. acryl_datahub_cloud/api/entity_versioning.py +167 -0
  6. acryl_datahub_cloud/datahub_metadata_sharing/__init__.py +0 -0
  7. acryl_datahub_cloud/datahub_metadata_sharing/metadata_sharing_source.py +267 -0
  8. acryl_datahub_cloud/datahub_metadata_sharing/query.py +7 -0
  9. acryl_datahub_cloud/datahub_metadata_sharing/scroll_shared_entities.gql +204 -0
  10. acryl_datahub_cloud/datahub_metadata_sharing/share_entity.gql +9 -0
  11. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +0 -2
  12. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +0 -1
  13. acryl_datahub_cloud/datahub_reporting/extract_graph.py +0 -1
  14. acryl_datahub_cloud/datahub_reporting/extract_sql.py +0 -1
  15. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +79 -57
  16. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +284 -258
  17. acryl_datahub_cloud/lineage_features/source.py +22 -5
  18. acryl_datahub_cloud/metadata/_urns/urn_defs.py +1593 -1494
  19. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
  20. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executor/__init__.py +15 -0
  21. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  22. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
  23. acryl_datahub_cloud/metadata/schema.avsc +20140 -19735
  24. acryl_datahub_cloud/metadata/schema_classes.py +1083 -486
  25. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +1 -1
  26. acryl_datahub_cloud/metadata/schemas/AssertionInferenceDetails.avsc +1 -1
  27. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +1 -1
  28. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +1 -1
  29. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  30. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  31. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceInfo.avsc +6 -0
  32. acryl_datahub_cloud/metadata/schemas/DataHubViewInfo.avsc +2 -0
  33. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +3 -1
  34. acryl_datahub_cloud/metadata/schemas/DataProcessInstanceInput.avsc +2 -4
  35. acryl_datahub_cloud/metadata/schemas/DataProcessInstanceOutput.avsc +0 -2
  36. acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +63 -0
  37. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +2 -1
  38. acryl_datahub_cloud/metadata/schemas/Deprecation.avsc +12 -0
  39. acryl_datahub_cloud/metadata/schemas/DynamicFormAssignment.avsc +2 -0
  40. acryl_datahub_cloud/metadata/schemas/EntityTypeKey.avsc +1 -0
  41. acryl_datahub_cloud/metadata/schemas/ExecutionRequestInput.avsc +9 -0
  42. acryl_datahub_cloud/metadata/schemas/ExecutionRequestResult.avsc +14 -0
  43. acryl_datahub_cloud/metadata/schemas/Filter.avsc +2 -0
  44. acryl_datahub_cloud/metadata/schemas/MLFeatureProperties.avsc +51 -0
  45. acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
  46. acryl_datahub_cloud/metadata/schemas/MLModelGroupProperties.avsc +51 -0
  47. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +2 -1
  48. acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +51 -0
  49. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
  50. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +20 -0
  51. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +10 -1
  52. acryl_datahub_cloud/metadata/schemas/PostInfo.avsc +23 -0
  53. acryl_datahub_cloud/metadata/schemas/RecommendationModule.avsc +2 -0
  54. acryl_datahub_cloud/metadata/schemas/RemoteExecutorKey.avsc +21 -0
  55. acryl_datahub_cloud/metadata/schemas/RemoteExecutorStatus.avsc +80 -0
  56. acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +2 -1
  57. acryl_datahub_cloud/metadata/schemas/VersionProperties.avsc +216 -0
  58. acryl_datahub_cloud/metadata/schemas/VersionSetKey.avsc +26 -0
  59. acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc +49 -0
  60. {acryl_datahub_cloud-0.3.7.9.1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/METADATA +52 -44
  61. {acryl_datahub_cloud-0.3.7.9.1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/RECORD +64 -48
  62. {acryl_datahub_cloud-0.3.7.9.1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/entry_points.txt +1 -0
  63. {acryl_datahub_cloud-0.3.7.9.1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/WHEEL +0 -0
  64. {acryl_datahub_cloud-0.3.7.9.1.dist-info → acryl_datahub_cloud-0.3.8.dist-info}/top_level.txt +0 -0
@@ -180,7 +180,6 @@ class DataHubFormReportingData(FormData):
180
180
  def form_assigned_date(
181
181
  self, search_row: DataHubDatasetSearchRow
182
182
  ) -> Dict[str, date]:
183
-
184
183
  form_assigned_dates: Dict[str, date] = {}
185
184
  forms = self.graph.get_aspect(search_row.urn, FormsClass)
186
185
  if not forms:
@@ -118,7 +118,6 @@ class DataHubReportingExtractGraphSource(Source):
118
118
  return skip_extract
119
119
 
120
120
  def get_workunits(self):
121
-
122
121
  self.graph = (
123
122
  self.ctx.require_graph("Loading default graph coordinates.")
124
123
  if self.config.server is None
@@ -118,7 +118,6 @@ class DataHubReportingExtractSQLSource(Source):
118
118
  return skip_extract
119
119
 
120
120
  def get_workunits(self):
121
-
122
121
  self.graph = (
123
122
  self.ctx.require_graph("Loading default graph coordinates.")
124
123
  if self.config.server is None
@@ -3,24 +3,31 @@ from typing import Dict
3
3
 
4
4
  class QueryBuilder:
5
5
  @staticmethod
6
- def get_soft_deleted_entities_query() -> Dict:
6
+ def get_dataset_entities_query() -> Dict:
7
7
  return {
8
8
  "sort": [{"urn": {"order": "asc"}}],
9
+ "_source": {
10
+ "includes": [
11
+ "urn",
12
+ "lastModifiedAt",
13
+ "removed",
14
+ "siblings",
15
+ "typeNames",
16
+ ]
17
+ },
9
18
  }
10
19
 
11
20
  @staticmethod
12
21
  def get_query_entities_query() -> Dict:
13
22
  return {
14
23
  "sort": [{"urn": {"order": "asc"}}],
24
+ "_source": {"includes": ["urn", "lastModifiedAt", "platform", "removed"]},
15
25
  "query": {
16
26
  "bool": {
17
- "filter": {
18
- "bool": {
19
- "must_not": [
20
- {"term": {"source": "MANUAL"}},
21
- ]
22
- }
23
- }
27
+ "filter": [
28
+ {"bool": {"must_not": [{"term": {"source": "MANUAL"}}]}},
29
+ {"exists": {"field": "platform"}},
30
+ ]
24
31
  }
25
32
  },
26
33
  }
@@ -29,6 +36,7 @@ class QueryBuilder:
29
36
  def get_upstreams_query() -> Dict:
30
37
  return {
31
38
  "sort": [{"destination.urn": {"order": "asc"}}],
39
+ "_source": {"includes": ["source.urn", "destination.urn"]},
32
40
  "query": {
33
41
  "bool": {
34
42
  "must": [
@@ -43,23 +51,27 @@ class QueryBuilder:
43
51
  def get_dashboard_usage_query(days: int) -> Dict:
44
52
  return {
45
53
  "sort": [{"urn": {"order": "asc"}}],
54
+ "_source": {
55
+ "includes": [
56
+ "timestampMillis",
57
+ "systemMetadata.lastObserved",
58
+ "urn",
59
+ "eventGranularity",
60
+ "viewsCount",
61
+ "uniqueUserCount",
62
+ "event.userCounts",
63
+ ]
64
+ },
46
65
  "query": {
47
66
  "bool": {
48
- "filter": {
49
- "bool": {
50
- "must": [
51
- {
52
- "range": {
53
- "@timestamp": {
54
- "gte": f"now-{days}d",
55
- "lt": "now/d",
56
- }
57
- }
58
- },
59
- {"term": {"isExploded": False}},
60
- ]
61
- }
62
- }
67
+ "filter": [
68
+ {
69
+ "range": {
70
+ "@timestamp": {"gte": f"now-{days}d", "lt": "now/d"}
71
+ }
72
+ },
73
+ {"term": {"isExploded": False}},
74
+ ]
63
75
  }
64
76
  },
65
77
  }
@@ -68,24 +80,28 @@ class QueryBuilder:
68
80
  def get_dataset_usage_query(days: int) -> Dict:
69
81
  return {
70
82
  "sort": [{"urn": {"order": "asc"}}],
83
+ "_source": {
84
+ "includes": [
85
+ "timestampMillis",
86
+ "urn",
87
+ "eventGranularity",
88
+ "totalSqlQueries",
89
+ "uniqueUserCount",
90
+ "event.userCounts",
91
+ "platform",
92
+ ]
93
+ },
71
94
  "query": {
72
95
  "bool": {
73
- "filter": {
74
- "bool": {
75
- "must": [
76
- {
77
- "range": {
78
- "@timestamp": {
79
- "gte": f"now-{days}d/d",
80
- "lt": "now/d",
81
- }
82
- }
83
- },
84
- {"term": {"isExploded": False}},
85
- {"range": {"totalSqlQueries": {"gt": 0}}},
86
- ]
87
- }
88
- }
96
+ "filter": [
97
+ {
98
+ "range": {
99
+ "@timestamp": {"gte": f"now-{days}d/d", "lt": "now/d"}
100
+ }
101
+ },
102
+ {"term": {"isExploded": False}},
103
+ {"range": {"totalSqlQueries": {"gt": 0}}},
104
+ ]
89
105
  }
90
106
  },
91
107
  }
@@ -94,6 +110,11 @@ class QueryBuilder:
94
110
  def get_dataset_write_usage_raw_query(days: int) -> Dict:
95
111
  return {
96
112
  "sort": [{"urn": {"order": "asc"}}, {"@timestamp": {"order": "asc"}}],
113
+ "_source": {
114
+ "includes": [
115
+ "urn" # Only field needed for platform extraction via regex
116
+ ]
117
+ },
97
118
  "query": {
98
119
  "bool": {
99
120
  "must": [
@@ -106,9 +127,6 @@ class QueryBuilder:
106
127
  ]
107
128
  }
108
129
  },
109
- "_source": {
110
- "includes": ["urn", "@timestamp"],
111
- },
112
130
  }
113
131
 
114
132
  @staticmethod
@@ -141,23 +159,27 @@ class QueryBuilder:
141
159
  def get_query_usage_query(days: int) -> Dict:
142
160
  return {
143
161
  "sort": [{"urn": {"order": "asc"}}],
162
+ "_source": {
163
+ "includes": [
164
+ "timestampMillis",
165
+ "systemMetadata.lastObserved",
166
+ "urn",
167
+ "eventGranularity",
168
+ "queryCount",
169
+ "uniqueUserCount",
170
+ "event.userCounts",
171
+ ]
172
+ },
144
173
  "query": {
145
174
  "bool": {
146
- "filter": {
147
- "bool": {
148
- "must": [
149
- {
150
- "range": {
151
- "@timestamp": {
152
- "gte": f"now-{days}d/d",
153
- "lt": "now/d",
154
- }
155
- }
156
- },
157
- {"term": {"isExploded": False}},
158
- ]
159
- }
160
- }
175
+ "filter": [
176
+ {
177
+ "range": {
178
+ "@timestamp": {"gte": f"now-{days}d/d", "lt": "now/d"}
179
+ }
180
+ },
181
+ {"term": {"isExploded": False}},
182
+ ]
161
183
  }
162
184
  },
163
185
  }