acryl-datahub-cloud 0.3.10rc4__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
- acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
- acryl_datahub_cloud/action_request/action_request_owner_source.py +37 -8
- acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +39 -19
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +60 -25
- acryl_datahub_cloud/datahub_reporting/extract_graph.py +9 -3
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +248 -52
- acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
- acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
- acryl_datahub_cloud/datahub_restore/source.py +3 -2
- acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +532 -109
- acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
- acryl_datahub_cloud/graphql_utils.py +64 -0
- acryl_datahub_cloud/lineage_features/source.py +555 -49
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2390 -1938
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +6 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +14 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/monitor/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- acryl_datahub_cloud/metadata/schema.avsc +27843 -23200
- acryl_datahub_cloud/metadata/schema_classes.py +29901 -24310
- acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +375 -212
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +191 -21
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
- acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
- acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
- acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataContractKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
- acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
- acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
- acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
- acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
- acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
- acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
- acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
- acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
- acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
- acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +424 -97
- acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +65 -44
- acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +84 -29
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +221 -23
- acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
- acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/QueryProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
- acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
- acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
- acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +147 -0
- acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +69 -0
- acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
- acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
- acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
- acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
- acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
- acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
- acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
- acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
- acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
- acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
- acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
- acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
- acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
- acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
- acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
- acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
- acryl_datahub_cloud/sdk/errors.py +34 -0
- acryl_datahub_cloud/sdk/resolver_client.py +42 -0
- acryl_datahub_cloud/sdk/subscription_client.py +737 -0
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +49 -43
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +243 -145
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
from typing import Any, Dict, List, Optional
|
|
2
|
+
from typing import Any, Dict, List, Optional, get_args, get_origin
|
|
3
3
|
|
|
4
4
|
import pyarrow as pa
|
|
5
5
|
from pydantic import BaseModel
|
|
@@ -15,7 +15,37 @@ class BaseModelRow(BaseModel):
|
|
|
15
15
|
return self.__dict__
|
|
16
16
|
|
|
17
17
|
@staticmethod
|
|
18
|
-
def pydantic_type_to_pyarrow(type_: Any) -> pa.DataType:
|
|
18
|
+
def pydantic_type_to_pyarrow(type_: Any) -> pa.DataType: # noqa: C901
|
|
19
|
+
# Handle generic types (List, Optional, Union, etc.)
|
|
20
|
+
origin = get_origin(type_)
|
|
21
|
+
|
|
22
|
+
if origin is list:
|
|
23
|
+
# List[X] -> list(X)
|
|
24
|
+
args = get_args(type_)
|
|
25
|
+
if args:
|
|
26
|
+
inner_type = BaseModelRow.pydantic_type_to_pyarrow(args[0])
|
|
27
|
+
return pa.list_(inner_type)
|
|
28
|
+
return pa.list_(pa.string()) # Default to list of strings
|
|
29
|
+
|
|
30
|
+
if origin is type(None):
|
|
31
|
+
# Just None type
|
|
32
|
+
return pa.null()
|
|
33
|
+
|
|
34
|
+
# Optional[X] is Union[X, None]
|
|
35
|
+
args = get_args(type_)
|
|
36
|
+
if args:
|
|
37
|
+
# Check if this is Optional (Union with None)
|
|
38
|
+
non_none_types = [arg for arg in args if arg is not type(None)]
|
|
39
|
+
if len(non_none_types) < len(args): # Had None in the union
|
|
40
|
+
if non_none_types:
|
|
41
|
+
return BaseModelRow.pydantic_type_to_pyarrow(non_none_types[0])
|
|
42
|
+
return pa.null()
|
|
43
|
+
|
|
44
|
+
# Handle simple types - check if it's actually a class first
|
|
45
|
+
if not isinstance(type_, type):
|
|
46
|
+
# If it's not a type/class, default to string
|
|
47
|
+
return pa.string()
|
|
48
|
+
|
|
19
49
|
if issubclass(type_, bool):
|
|
20
50
|
return pa.bool_()
|
|
21
51
|
elif issubclass(type_, int):
|
|
@@ -32,28 +62,60 @@ class BaseModelRow(BaseModel):
|
|
|
32
62
|
else:
|
|
33
63
|
raise ValueError(f"No mapping for type {type_}")
|
|
34
64
|
|
|
65
|
+
@staticmethod
|
|
66
|
+
def string_to_pyarrow_type(type_string: str) -> pa.DataType:
|
|
67
|
+
"""Convert string representation back to pyarrow type by converting to Python type first."""
|
|
68
|
+
# Mapping of pyarrow string representations to Python types
|
|
69
|
+
type_mapping = {
|
|
70
|
+
"string": str,
|
|
71
|
+
"int64": int,
|
|
72
|
+
"float64": float,
|
|
73
|
+
"bool": bool,
|
|
74
|
+
"timestamp[ns]": datetime.datetime,
|
|
75
|
+
"date32[day]": datetime.date,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
python_type = type_mapping.get(
|
|
79
|
+
type_string, str
|
|
80
|
+
) # Default to str for unknown types
|
|
81
|
+
return BaseModelRow.pydantic_type_to_pyarrow(python_type)
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def _is_optional_type(type_: Any) -> bool:
|
|
85
|
+
"""Check if a type annotation is Optional (i.e., Union with None)."""
|
|
86
|
+
origin = get_origin(type_)
|
|
87
|
+
if origin is type(None):
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
args = get_args(type_)
|
|
91
|
+
if args:
|
|
92
|
+
# Check if None is in the Union args (Optional is Union[X, None])
|
|
93
|
+
return type(None) in args
|
|
94
|
+
|
|
95
|
+
return False
|
|
96
|
+
|
|
35
97
|
@classmethod
|
|
36
98
|
def arrow_schema(cls) -> pa.Schema:
|
|
37
99
|
fields = []
|
|
38
|
-
for field_name, field_model in cls.
|
|
39
|
-
pyarrow_type = BaseModelRow.pydantic_type_to_pyarrow(
|
|
40
|
-
field_model.outer_type_
|
|
41
|
-
)
|
|
100
|
+
for field_name, field_model in cls.model_fields.items():
|
|
101
|
+
pyarrow_type = BaseModelRow.pydantic_type_to_pyarrow(field_model.annotation)
|
|
42
102
|
pyarrow_field = pa.field(field_name, pyarrow_type)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
103
|
+
|
|
104
|
+
# Check if the type is Optional (Union with None) OR if the field has a default value
|
|
105
|
+
is_nullable = (
|
|
106
|
+
BaseModelRow._is_optional_type(field_model.annotation)
|
|
107
|
+
or not field_model.is_required()
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
pyarrow_field = pyarrow_field.with_nullable(is_nullable)
|
|
47
111
|
fields.append(pyarrow_field)
|
|
48
112
|
return pa.schema(fields)
|
|
49
113
|
|
|
50
114
|
@classmethod
|
|
51
115
|
def datahub_schema(cls) -> List[SchemaField]:
|
|
52
116
|
fields = []
|
|
53
|
-
for field_name, field_model in cls.
|
|
54
|
-
pyarrow_type = BaseModelRow.pydantic_type_to_pyarrow(
|
|
55
|
-
field_model.outer_type_
|
|
56
|
-
)
|
|
117
|
+
for field_name, field_model in cls.model_fields.items():
|
|
118
|
+
pyarrow_type = BaseModelRow.pydantic_type_to_pyarrow(field_model.annotation)
|
|
57
119
|
fields.append(SchemaField(name=field_name, type=str(pyarrow_type)))
|
|
58
120
|
return fields
|
|
59
121
|
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict, List, Type, TypeVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
T = TypeVar("T", bound=BaseModel)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_extra_properties_for_model(
|
|
13
|
+
extra_properties: List[Dict[str, str]], model_class: Type[T]
|
|
14
|
+
) -> Dict[str, Any]:
|
|
15
|
+
"""
|
|
16
|
+
Parse GraphQL extraProperties into a dictionary suitable for Pydantic model instantiation.
|
|
17
|
+
|
|
18
|
+
GraphQL returns extra properties where values are JSON-encoded strings. This function:
|
|
19
|
+
1. Filters to only include fields defined in the model
|
|
20
|
+
2. Parses JSON-encoded values back to Python objects
|
|
21
|
+
3. Handles parsing errors gracefully with logging
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
extra_properties: List of dicts with 'name' and 'value' keys from GraphQL response
|
|
25
|
+
model_class: Pydantic model class to parse properties for
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Dictionary mapping field names to parsed values, ready for model instantiation
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
extra_props = [
|
|
32
|
+
{"name": "urn", "value": '"urn:li:dataset:..."'},
|
|
33
|
+
{"name": "owners", "value": '["urn:li:corpuser:user1"]'},
|
|
34
|
+
{"name": "scrollId", "value": "eyJzb3J0Ijp..."} # Not in model, will be skipped
|
|
35
|
+
]
|
|
36
|
+
parsed = parse_extra_properties_for_model(extra_props, MyModel)
|
|
37
|
+
instance = MyModel(**parsed)
|
|
38
|
+
"""
|
|
39
|
+
# Only parse fields that are defined in the model.
|
|
40
|
+
# Extra fields like scrollId are ignored.
|
|
41
|
+
model_fields = model_class.model_fields.keys()
|
|
42
|
+
extra_properties_map: Dict[str, Any] = {}
|
|
43
|
+
|
|
44
|
+
for prop in extra_properties:
|
|
45
|
+
name = prop["name"]
|
|
46
|
+
# Skip fields that aren't in the model (e.g., scrollId)
|
|
47
|
+
if name not in model_fields:
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
# All model fields should be JSON-encoded, parse them
|
|
51
|
+
value = prop["value"]
|
|
52
|
+
try:
|
|
53
|
+
extra_properties_map[name] = json.loads(value)
|
|
54
|
+
except json.JSONDecodeError as e:
|
|
55
|
+
# This indicates a data contract violation - the field should be JSON-encoded
|
|
56
|
+
logger.warning(
|
|
57
|
+
f"Failed to parse field '{name}' as JSON. "
|
|
58
|
+
f"Value: '{value[:100]}...'. Error: {e}. "
|
|
59
|
+
f"Skipping field and using default value."
|
|
60
|
+
)
|
|
61
|
+
# Skip this field - Pydantic will use the default value if available,
|
|
62
|
+
# or fail validation if it's a required field
|
|
63
|
+
|
|
64
|
+
return extra_properties_map
|