acryl-datahub-cloud 0.3.10rc4__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (243) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
  3. acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
  4. acryl_datahub_cloud/action_request/action_request_owner_source.py +37 -8
  5. acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
  7. acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
  8. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  9. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  10. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  11. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  12. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +39 -19
  13. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +60 -25
  14. acryl_datahub_cloud/datahub_reporting/extract_graph.py +9 -3
  15. acryl_datahub_cloud/datahub_reporting/extract_sql.py +248 -52
  16. acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
  17. acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
  18. acryl_datahub_cloud/datahub_restore/source.py +3 -2
  19. acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
  20. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  21. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +532 -109
  22. acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
  23. acryl_datahub_cloud/graphql_utils.py +64 -0
  24. acryl_datahub_cloud/lineage_features/source.py +555 -49
  25. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2390 -1938
  26. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
  27. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
  28. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  29. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +6 -2
  30. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
  31. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
  32. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
  33. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
  34. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  35. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
  36. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
  37. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
  38. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  39. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +14 -0
  40. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
  41. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  42. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/monitor/__init__.py +6 -0
  43. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  44. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  45. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  46. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  47. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
  48. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  49. acryl_datahub_cloud/metadata/schema.avsc +27843 -23200
  50. acryl_datahub_cloud/metadata/schema_classes.py +29901 -24310
  51. acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
  52. acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
  53. acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
  54. acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
  55. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  56. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
  57. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  58. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +375 -212
  59. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
  60. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  61. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +191 -21
  62. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
  63. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
  64. acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
  65. acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
  66. acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
  67. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  68. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  69. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  70. acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
  71. acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
  72. acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
  73. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  74. acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
  75. acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  76. acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
  77. acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
  78. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
  79. acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
  80. acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
  81. acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
  82. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  83. acryl_datahub_cloud/metadata/schemas/DataContractKey.avsc +2 -1
  84. acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
  85. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  86. acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
  87. acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
  88. acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
  89. acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
  90. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  91. acryl_datahub_cloud/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  92. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  93. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
  94. acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  95. acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  96. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  97. acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
  98. acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
  99. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  100. acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
  101. acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
  102. acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
  103. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
  104. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
  105. acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +4 -2
  106. acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
  107. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
  108. acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
  109. acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  110. acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
  111. acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
  112. acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
  113. acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
  114. acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
  115. acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
  116. acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
  117. acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
  118. acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
  119. acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
  120. acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
  121. acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
  122. acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
  123. acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
  124. acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
  125. acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
  126. acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
  127. acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
  128. acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
  129. acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
  130. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  131. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  132. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
  133. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  134. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
  135. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
  136. acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
  137. acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
  138. acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
  139. acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
  140. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
  141. acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
  142. acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
  143. acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
  144. acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
  145. acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
  146. acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
  147. acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
  148. acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
  149. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
  150. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
  151. acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
  152. acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  153. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
  154. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
  155. acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
  156. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
  157. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +424 -97
  158. acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +65 -44
  159. acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  160. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +84 -29
  161. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +221 -23
  162. acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
  163. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
  164. acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
  165. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  166. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
  167. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  168. acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
  169. acryl_datahub_cloud/metadata/schemas/QueryProperties.avsc +4 -2
  170. acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
  171. acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  172. acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
  173. acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
  174. acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
  175. acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
  176. acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
  177. acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
  178. acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
  179. acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  180. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
  181. acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
  182. acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +147 -0
  183. acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
  184. acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
  185. acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
  186. acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
  187. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  188. acryl_datahub_cloud/notifications/__init__.py +0 -0
  189. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  190. acryl_datahub_cloud/sdk/__init__.py +69 -0
  191. acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
  192. acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
  193. acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
  194. acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
  195. acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
  196. acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
  197. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
  198. acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
  199. acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
  200. acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
  201. acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
  202. acryl_datahub_cloud/sdk/assertion/types.py +20 -0
  203. acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
  204. acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
  205. acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
  206. acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
  207. acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
  208. acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
  209. acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
  210. acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
  211. acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
  212. acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
  213. acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
  214. acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
  215. acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
  216. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  217. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
  218. acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
  219. acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
  220. acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
  221. acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
  222. acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
  223. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
  224. acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
  225. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
  226. acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
  227. acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
  228. acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
  229. acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
  230. acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
  231. acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
  232. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  233. acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
  234. acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
  235. acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
  236. acryl_datahub_cloud/sdk/errors.py +34 -0
  237. acryl_datahub_cloud/sdk/resolver_client.py +42 -0
  238. acryl_datahub_cloud/sdk/subscription_client.py +737 -0
  239. {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +49 -43
  240. {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +243 -145
  241. {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
  242. {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
  243. {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
@@ -5,14 +5,14 @@ import pathlib
5
5
  import tempfile
6
6
  import time
7
7
  from enum import Enum
8
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
8
+ from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union, cast
9
9
 
10
10
  import boto3
11
11
  import duckdb
12
12
  import pandas
13
13
  import pyarrow as pa
14
14
  import pyarrow.parquet as pq
15
- from pydantic import BaseModel, validator
15
+ from pydantic import BaseModel, field_validator
16
16
 
17
17
  from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow, SchemaField
18
18
  from datahub.configuration.common import ConfigModel
@@ -73,7 +73,9 @@ class FileStoreBackedDatasetConfig(ConfigModel):
73
73
  store_platform: str = "s3"
74
74
  file_name: str = "data"
75
75
  file_extension: str = "parquet"
76
- file_compression: str = "snappy"
76
+ file_compression: Literal[
77
+ "gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"
78
+ ] = "snappy"
77
79
  file_overwrite_existing: bool = True
78
80
  snapshot_partitioning_strategy: str = PartitioningStrategy.DATE
79
81
  generate_presigned_url: bool = True
@@ -85,7 +87,8 @@ class FileStoreBackedDatasetConfig(ConfigModel):
85
87
 
86
88
  datahub_platform: str = "acryl"
87
89
 
88
- @validator("snapshot_partitioning_strategy")
90
+ @field_validator("snapshot_partitioning_strategy")
91
+ @classmethod
89
92
  def validate_partitioning_strategy(cls, v):
90
93
  if v not in PartitioningStrategy._value2member_map_:
91
94
  raise ValueError(f"Unsupported partitioning strategy: {v}")
@@ -119,9 +122,14 @@ class DataHubBasedS3Dataset:
119
122
  self.local_file_path: str = (
120
123
  config.file if config.file else self._initialize_local_file()
121
124
  )
122
- self.file_writer = None
125
+ self.file_writer: Optional[pq.ParquetWriter] = None
123
126
  self.schema = (
124
- pa.schema([(x.name, x.type) for x in self.dataset_metadata.schemaFields])
127
+ pa.schema(
128
+ [
129
+ pa.field(x.name, BaseModelRow.string_to_pyarrow_type(x.type))
130
+ for x in self.dataset_metadata.schemaFields
131
+ ]
132
+ )
125
133
  if self.dataset_metadata.schemaFields
126
134
  else None
127
135
  )
@@ -163,18 +171,32 @@ class DataHubBasedS3Dataset:
163
171
  self.schema = row.arrow_schema()
164
172
  else:
165
173
  # hail mary: infer schema from the first row and cast everything to string
166
- self.schema = pa.schema([(key, pa.string()) for key in row.keys()])
174
+ self.schema = pa.schema([pa.field(key, pa.string()) for key in row])
167
175
  self.stringify_row = True
168
176
 
169
177
  self._initialize_local_file()
178
+ # Map compression names to PyArrow format (most are direct mappings)
179
+ compression_map = {
180
+ "gzip": "gzip",
181
+ "bz2": "brotli", # PyArrow doesn't support bz2, use brotli
182
+ "brotli": "brotli",
183
+ "lz4": "lz4",
184
+ "zstd": "zstd",
185
+ "snappy": "snappy",
186
+ "none": "none",
187
+ }
188
+ compression = cast(
189
+ Literal["gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"],
190
+ compression_map.get(self.config.file_compression, "snappy"),
191
+ )
170
192
  self.file_writer = pq.ParquetWriter(
171
193
  self.local_file_path,
172
194
  self.schema,
173
- compression=self.config.file_compression,
195
+ compression=compression,
174
196
  )
175
- if isinstance(row, BaseModel) or isinstance(row, BaseModelRow):
197
+ if isinstance(row, (BaseModel, BaseModelRow)):
176
198
  # for anything extending BaseModel, we want to use the dict representation
177
- write_row: Dict[str, Any] = row.dict()
199
+ write_row: Dict[str, Any] = row.model_dump()
178
200
  elif isinstance(row, dict):
179
201
  write_row = row
180
202
  else:
@@ -271,14 +293,10 @@ class DataHubBasedS3Dataset:
271
293
  )
272
294
 
273
295
  def _generate_schema_metadata(
274
- self, duckdb_columns: List[Tuple[str, str]]
296
+ self, duckdb_columns: List[Tuple[str, Any]]
275
297
  ) -> SchemaMetadataClass:
276
298
  def get_type_from_dtype(dtype: str) -> SchemaFieldDataTypeClass:
277
- if "int" in dtype:
278
- return SchemaFieldDataTypeClass(type=NumberTypeClass())
279
- elif "float" in dtype:
280
- return SchemaFieldDataTypeClass(type=NumberTypeClass())
281
- elif "number" in dtype:
299
+ if "int" in dtype or "float" in dtype or "number" in dtype:
282
300
  return SchemaFieldDataTypeClass(type=NumberTypeClass())
283
301
  elif "bool" in dtype:
284
302
  return SchemaFieldDataTypeClass(type=BooleanTypeClass())
@@ -306,7 +324,7 @@ class DataHubBasedS3Dataset:
306
324
  )
307
325
  for column in duckdb_columns:
308
326
  # generate data type
309
- data_type = column[1].lower()
327
+ data_type = str(column[1]).lower()
310
328
  schema_metadata.fields.append(
311
329
  SchemaFieldClass(
312
330
  fieldPath=column[0],
@@ -345,7 +363,7 @@ class DataHubBasedS3Dataset:
345
363
  # generate min, max, avg, distinct count, null count
346
364
  column_name = column[0]
347
365
  logger.info(f"Generating field profile for {column_name}")
348
- data_type = column[1].lower()
366
+ data_type = str(column[1]).lower()
349
367
  if "int" in data_type or "float" in data_type:
350
368
  query = (
351
369
  f"SELECT COUNT(DISTINCT {column_name}), COUNT(*) - COUNT({column_name}), MIN({column_name}), MAX({column_name}), AVG({column_name})"
@@ -400,7 +418,9 @@ class DataHubBasedS3Dataset:
400
418
  assert dataset_profiles.fieldProfiles is not None
401
419
  dataset_profiles.fieldProfiles.append(field_profile)
402
420
  logger.info("Generated dataset profile")
403
- schema_metadata = self._generate_schema_metadata(columns)
421
+ schema_metadata = self._generate_schema_metadata(
422
+ [(col[0], col[1]) for col in columns]
423
+ )
404
424
  return dataset_profiles, schema_metadata
405
425
 
406
426
  def register_dataset(
@@ -1,13 +1,16 @@
1
- import json
2
1
  import logging
3
2
  from datetime import date, datetime, timezone
4
3
  from enum import Enum
5
- from typing import Any, Callable, Dict, Iterable, List, Optional
4
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Union
6
5
 
7
6
  import pandas as pd
7
+ from pydantic import BaseModel, field_validator
8
+
9
+ from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow
10
+ from acryl_datahub_cloud.graphql_utils import parse_extra_properties_for_model
8
11
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
9
12
  from datahub.ingestion.graph.client import DataHubGraph
10
- from datahub.ingestion.graph.filters import RawSearchFilterRule
13
+ from datahub.ingestion.graph.filters import RawSearchFilter
11
14
  from datahub.metadata.schema_classes import (
12
15
  DomainPropertiesClass,
13
16
  FormAssociationClass,
@@ -16,9 +19,6 @@ from datahub.metadata.schema_classes import (
16
19
  FormStateClass,
17
20
  FormTypeClass,
18
21
  )
19
- from pydantic import BaseModel
20
-
21
- from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
@@ -130,6 +130,22 @@ class DataHubFormReportingData(FormData):
130
130
  platformInstance: Optional[str] = None
131
131
  domains: List[str] = []
132
132
 
133
+ @field_validator(
134
+ "completedFormsIncompletePromptResponseTimes",
135
+ "completedFormsCompletedPromptResponseTimes",
136
+ "incompleteFormsIncompletePromptResponseTimes",
137
+ "incompleteFormsCompletedPromptResponseTimes",
138
+ mode="before",
139
+ )
140
+ @classmethod
141
+ def convert_timestamps_to_strings(
142
+ cls, v: Union[List[int], List[str]]
143
+ ) -> List[str]:
144
+ """Convert timestamp integers to strings for compatibility with GMS data."""
145
+ if not isinstance(v, list):
146
+ return v
147
+ return [str(item) for item in v]
148
+
133
149
  def __init__(self, graph: DataHubGraph, allowed_forms: Optional[List[str]] = None):
134
150
  self.graph: DataHubGraph = graph
135
151
  self.form_registry = FormRegistry(graph)
@@ -143,13 +159,13 @@ class DataHubFormReportingData(FormData):
143
159
  on_form_scanned: Callable[[str], Any],
144
160
  ) -> pd.DataFrame:
145
161
  return pd.DataFrame(
146
- x.dict()
162
+ x.model_dump()
147
163
  for x in self.get_data(
148
164
  on_asset_scanned=on_asset_scanned, on_form_scanned=on_form_scanned
149
165
  )
150
166
  )
151
167
 
152
- def get_form_existence_or_filters(self) -> List[RawSearchFilterRule]:
168
+ def get_form_existence_or_filters(self) -> RawSearchFilter:
153
169
  """
154
170
  Datasets must either have completedForms or incompleteForms assigned to
155
171
  them
@@ -157,25 +173,41 @@ class DataHubFormReportingData(FormData):
157
173
  if self.allowed_forms:
158
174
  return [
159
175
  {
160
- "field": "completedForms",
161
- "condition": "EQUAL",
162
- "values": self.allowed_forms,
176
+ "and": [
177
+ {
178
+ "field": "completedForms",
179
+ "condition": "EQUAL",
180
+ "values": self.allowed_forms,
181
+ }
182
+ ]
163
183
  },
164
184
  {
165
- "field": "incompleteForms",
166
- "condition": "EQUAL",
167
- "values": self.allowed_forms,
185
+ "and": [
186
+ {
187
+ "field": "incompleteForms",
188
+ "condition": "EQUAL",
189
+ "values": self.allowed_forms,
190
+ }
191
+ ]
168
192
  },
169
193
  ]
170
194
  else:
171
195
  return [
172
196
  {
173
- "field": "completedForms",
174
- "condition": "EXISTS",
197
+ "and": [
198
+ {
199
+ "field": "completedForms",
200
+ "condition": "EXISTS",
201
+ }
202
+ ]
175
203
  },
176
204
  {
177
- "field": "incompleteForms",
178
- "condition": "EXISTS",
205
+ "and": [
206
+ {
207
+ "field": "incompleteForms",
208
+ "condition": "EXISTS",
209
+ }
210
+ ]
179
211
  },
180
212
  ]
181
213
 
@@ -257,6 +289,7 @@ class DataHubFormReportingData(FormData):
257
289
  for prompt_id, response_time in zip(
258
290
  search_row.completedFormsCompletedPromptIds,
259
291
  search_row.completedFormsCompletedPromptResponseTimes,
292
+ strict=False,
260
293
  )
261
294
  if prompt_id in form_prompts
262
295
  }
@@ -289,7 +322,8 @@ class DataHubFormReportingData(FormData):
289
322
  on_asset_scanned: Optional[Callable[[str], Any]] = None,
290
323
  on_form_scanned: Optional[Callable[[str], Any]] = None,
291
324
  ) -> Iterable[FormReportingRow]:
292
- extra_fields = [f for f in self.DataHubDatasetSearchRow.__fields__.keys()]
325
+ extra_fields = [f for f in self.DataHubDatasetSearchRow.model_fields]
326
+ # TODO: Replace with the new search/filter SDK.
293
327
  result = self.graph.get_results_by_filter(
294
328
  extra_or_filters=self.get_form_existence_or_filters(),
295
329
  extra_source_fields=extra_fields,
@@ -302,10 +336,9 @@ class DataHubFormReportingData(FormData):
302
336
  if row_index % 100 == 0:
303
337
  logger.info(f"Scanned {row_index} assets")
304
338
  extra_properties = row["extraProperties"]
305
-
306
- extra_properties_map = {
307
- x["name"]: json.loads(x["value"]) for x in extra_properties
308
- }
339
+ extra_properties_map = parse_extra_properties_for_model(
340
+ extra_properties, self.DataHubDatasetSearchRow
341
+ )
309
342
  search_row = self.DataHubDatasetSearchRow(**extra_properties_map)
310
343
  if on_asset_scanned:
311
344
  on_asset_scanned(search_row.urn)
@@ -388,6 +421,7 @@ class DataHubFormReportingData(FormData):
388
421
  for (p, p_response_time) in zip(
389
422
  search_row.incompleteFormsCompletedPromptIds,
390
423
  search_row.incompleteFormsCompletedPromptResponseTimes,
424
+ strict=False,
391
425
  )
392
426
  if p in form_prompts
393
427
  ]:
@@ -411,7 +445,7 @@ class DataHubFormReportingData(FormData):
411
445
  question_status=QuestionStatus.COMPLETED,
412
446
  question_completed_date=datetime.fromtimestamp(
413
447
  float(prompt_response_time) / 1000, tz=timezone.utc
414
- ),
448
+ ).date(),
415
449
  snapshot_date=self.snapshot_date,
416
450
  )
417
451
  complete_forms = (
@@ -485,6 +519,7 @@ class DataHubFormReportingData(FormData):
485
519
  for (p, p_response_time) in zip(
486
520
  search_row.completedFormsCompletedPromptIds,
487
521
  search_row.completedFormsCompletedPromptResponseTimes,
522
+ strict=False,
488
523
  )
489
524
  if p in form_prompts
490
525
  ]:
@@ -512,7 +547,7 @@ class DataHubFormReportingData(FormData):
512
547
  question_status=QuestionStatus.COMPLETED,
513
548
  question_completed_date=datetime.fromtimestamp(
514
549
  float(prompt_response_time) / 1000, tz=timezone.utc
515
- ),
550
+ ).date(),
516
551
  snapshot_date=self.snapshot_date,
517
552
  )
518
553
 
@@ -6,7 +6,7 @@ from typing import List, Optional
6
6
 
7
7
  import boto3
8
8
  from opensearchpy import OpenSearch
9
- from pydantic import validator
9
+ from pydantic import field_validator
10
10
 
11
11
  from acryl_datahub_cloud.datahub_reporting.datahub_dataset import (
12
12
  DataHubBasedS3Dataset,
@@ -32,6 +32,7 @@ logger = logging.getLogger(__name__)
32
32
 
33
33
 
34
34
  class DataHubReportingExtractGraphSourceConfig(ConfigModel):
35
+ enabled: bool = True
35
36
  server: Optional[DatahubClientConfig] = None
36
37
  search_index: ElasticSearchClientConfig = ElasticSearchClientConfig()
37
38
  extract_graph_store: FileStoreBackedDatasetConfig
@@ -42,8 +43,9 @@ class DataHubReportingExtractGraphSourceConfig(ConfigModel):
42
43
  query_timeout: int = 30
43
44
  extract_batch_size: int = 2000
44
45
 
45
- @validator("extract_graph_store", pre=True, always=True)
46
- def set_default_extract_soft_delete_flag(cls, v, values):
46
+ @field_validator("extract_graph_store", mode="before")
47
+ @classmethod
48
+ def set_default_extract_soft_delete_flag(cls, v):
47
49
  if v is not None:
48
50
  if "dataset_registration_spec" not in v:
49
51
  v["dataset_registration_spec"] = DatasetRegistrationSpec(
@@ -118,6 +120,10 @@ class DataHubReportingExtractGraphSource(Source):
118
120
  return skip_extract
119
121
 
120
122
  def get_workunits(self):
123
+ if not self.config.enabled:
124
+ logger.info("Source is disabled, stopping")
125
+ return
126
+
121
127
  self.graph = (
122
128
  self.ctx.require_graph("Loading default graph coordinates.")
123
129
  if self.config.server is None