acryl-datahub-cloud 0.3.8.3__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
- acryl_datahub_cloud/action_request/__init__.py +0 -0
- acryl_datahub_cloud/action_request/action_request_owner_source.py +174 -0
- acryl_datahub_cloud/api/__init__.py +1 -1
- acryl_datahub_cloud/api/client.py +2 -2
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +6 -6
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +69 -35
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +4 -4
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_patch_builder.py +21 -21
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +14 -13
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1130 -484
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionrequest/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executorglobalconfig/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executorpool/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metric/__init__.py +29 -0
- acryl_datahub_cloud/metadata/schema.avsc +839 -49
- acryl_datahub_cloud/metadata/schema_classes.py +1286 -63
- acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +422 -12
- acryl_datahub_cloud/metadata/schemas/ActionRequestStatus.avsc +12 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +5 -3
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +5 -3
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +5 -3
- acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- acryl_datahub_cloud/metadata/schemas/BusinessAttributes.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/DataHubActionInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/DataHubConnectionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceInfo.avsc +9 -4
- acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeDefinition.avsc +185 -0
- acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeEvent.avsc +184 -0
- acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +4 -4
- acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataProcessInstanceInput.avsc +132 -2
- acryl_datahub_cloud/metadata/schemas/DataProcessInstanceOutput.avsc +131 -1
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +14 -13
- acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestInput.avsc +6 -1
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestSignal.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTerms.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
- acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +96 -0
- acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/InputFields.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +399 -176
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +6 -4
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/RemoteExecutorGlobalConfigKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolGlobalConfig.avsc +16 -0
- acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolInfo.avsc +85 -0
- acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/RemoteExecutorStatus.avsc +5 -5
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +2 -2
- acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/VersionProperties.avsc +18 -0
- acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc +5 -0
- {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/METADATA +42 -42
- {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/RECORD +78 -68
- {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/entry_points.txt +1 -0
- acryl_datahub_cloud/api/entity_versioning.py +0 -167
- {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/top_level.txt +0 -0
|
@@ -662,7 +662,7 @@ def generate_markdown_with_logo(
|
|
|
662
662
|
logo_url = get_platform_logo_url(platform)
|
|
663
663
|
formatted_date = date.strftime("%B %d, %Y")
|
|
664
664
|
if logo_url:
|
|
665
|
-
markdown = f'<img src="{logo_url}" alt="Logo" width="{logo_size}" height="{logo_size}" style="vertical-align: middle;"> by {author.name} on {formatted_date} {""if is_public else "(Internal)"}'
|
|
665
|
+
markdown = f'<img src="{logo_url}" alt="Logo" width="{logo_size}" height="{logo_size}" style="vertical-align: middle;"> by {author.name} on {formatted_date} {"" if is_public else "(Internal)"}'
|
|
666
666
|
else:
|
|
667
667
|
markdown = f":{platform}: by {author.name} on {formatted_date} {'' if is_public else '(Internal)'}"
|
|
668
668
|
return markdown
|
|
File without changes
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Iterable, List, Optional
|
|
3
|
+
|
|
4
|
+
from datahub.configuration import ConfigModel
|
|
5
|
+
from datahub.ingestion.api.common import PipelineContext
|
|
6
|
+
from datahub.ingestion.api.decorators import (
|
|
7
|
+
SupportStatus,
|
|
8
|
+
config_class,
|
|
9
|
+
platform_name,
|
|
10
|
+
support_status,
|
|
11
|
+
)
|
|
12
|
+
from datahub.ingestion.api.source import Source, SourceReport
|
|
13
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
14
|
+
from datahub.metadata.schema_classes import (
|
|
15
|
+
ActionRequestInfoClass,
|
|
16
|
+
)
|
|
17
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
18
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ActionRequestOwnerSourceConfig(ConfigModel):
|
|
25
|
+
batch_size: int = 20
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ActionRequestOwnerSourceReport(SourceReport):
|
|
29
|
+
total_requests: int = 0
|
|
30
|
+
processed_proposals = 0
|
|
31
|
+
correct_assignees_not_found = 0
|
|
32
|
+
correct_proposal_owners = 0
|
|
33
|
+
incorrect_proposal_owners = 0
|
|
34
|
+
action_request_info_not_found = 0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
ACTION_REQUESTS = """
|
|
38
|
+
query listActionRequests($input: ListActionRequestsInput!) {
|
|
39
|
+
listActionRequests(input: $input) {
|
|
40
|
+
start
|
|
41
|
+
count
|
|
42
|
+
total
|
|
43
|
+
actionRequests {
|
|
44
|
+
urn
|
|
45
|
+
type
|
|
46
|
+
entity {
|
|
47
|
+
urn
|
|
48
|
+
}
|
|
49
|
+
subResource
|
|
50
|
+
subResourceType
|
|
51
|
+
assignedUsers
|
|
52
|
+
assignedGroups
|
|
53
|
+
assignedRoles
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
ACTION_REQUEST_ASSIGNEES = """
|
|
60
|
+
query getActionRequestAssignee($input: GetActionRequestAssigneeInput!) {
|
|
61
|
+
getActionRequestAssignee(input: $input)
|
|
62
|
+
}
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@platform_name(id="datahub", platform_name="DataHub")
|
|
67
|
+
@config_class(ActionRequestOwnerSourceConfig)
|
|
68
|
+
@support_status(SupportStatus.INCUBATING)
|
|
69
|
+
class ActionRequestOwnerSource(Source):
|
|
70
|
+
def __init__(self, config: ActionRequestOwnerSourceConfig, ctx: PipelineContext):
|
|
71
|
+
super().__init__(ctx)
|
|
72
|
+
self.config: ActionRequestOwnerSourceConfig = config
|
|
73
|
+
self.report = ActionRequestOwnerSourceReport()
|
|
74
|
+
self.graph = ctx.require_graph("Proposal Owner source")
|
|
75
|
+
self.event_not_produced_warn = False
|
|
76
|
+
|
|
77
|
+
def _process_action_request(
|
|
78
|
+
self, action_request: Dict
|
|
79
|
+
) -> Optional[MetadataChangeProposalWrapper]:
|
|
80
|
+
self.report.processed_proposals += 1
|
|
81
|
+
action_request_urn = action_request.get("urn")
|
|
82
|
+
action_type = action_request.get("type")
|
|
83
|
+
action_request_entity = action_request.get("entity")
|
|
84
|
+
assert action_request_entity is not None
|
|
85
|
+
resource_urn = action_request_entity.get("urn")
|
|
86
|
+
sub_resource = action_request.get("subResource")
|
|
87
|
+
sub_resource_type = action_request.get("subResourceType")
|
|
88
|
+
assigned_users = action_request.get("assignedUsers")
|
|
89
|
+
assigned_groups = action_request.get("assignedGroups")
|
|
90
|
+
assigned_roles = action_request.get("assignedRoles")
|
|
91
|
+
|
|
92
|
+
correct_assignees = self.graph.execute_graphql(
|
|
93
|
+
query=ACTION_REQUEST_ASSIGNEES,
|
|
94
|
+
variables={
|
|
95
|
+
"input": {
|
|
96
|
+
"resourceUrn": resource_urn,
|
|
97
|
+
"actionRequestType": action_type,
|
|
98
|
+
"subResource": sub_resource,
|
|
99
|
+
"subResourceType": sub_resource_type,
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
).get("getActionRequestAssignee")
|
|
103
|
+
if correct_assignees is None:
|
|
104
|
+
self.report.correct_assignees_not_found += 1
|
|
105
|
+
logger.error(
|
|
106
|
+
f"Correct assignees not found for action request {action_request_urn}"
|
|
107
|
+
)
|
|
108
|
+
return None
|
|
109
|
+
correct_users = [
|
|
110
|
+
x for x in correct_assignees if guess_entity_type(x) == "corpuser"
|
|
111
|
+
]
|
|
112
|
+
correct_groups = [
|
|
113
|
+
x for x in correct_assignees if guess_entity_type(x) == "corpGroup"
|
|
114
|
+
]
|
|
115
|
+
correct_roles = [
|
|
116
|
+
x for x in correct_assignees if guess_entity_type(x) == "dataHubRole"
|
|
117
|
+
]
|
|
118
|
+
if (
|
|
119
|
+
assigned_users == correct_users
|
|
120
|
+
and assigned_groups == correct_groups
|
|
121
|
+
and assigned_roles == correct_roles
|
|
122
|
+
):
|
|
123
|
+
self.report.correct_proposal_owners += 1
|
|
124
|
+
return None
|
|
125
|
+
action_request_info = self.graph.get_aspect_v2(
|
|
126
|
+
entity_urn=str(action_request_urn),
|
|
127
|
+
aspect="actionRequestInfo",
|
|
128
|
+
aspect_type=ActionRequestInfoClass,
|
|
129
|
+
)
|
|
130
|
+
if action_request_info is None:
|
|
131
|
+
self.report.action_request_info_not_found += 1
|
|
132
|
+
logger.error(
|
|
133
|
+
f"Action request info not found for action request {action_request_urn}"
|
|
134
|
+
)
|
|
135
|
+
return None
|
|
136
|
+
action_request_info.assignedUsers = correct_users
|
|
137
|
+
action_request_info.assignedGroups = correct_groups
|
|
138
|
+
action_request_info.assignedRoles = correct_roles
|
|
139
|
+
return MetadataChangeProposalWrapper(
|
|
140
|
+
entityUrn=action_request_urn, aspect=action_request_info
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def _get_action_requests(self, start: int) -> List:
|
|
144
|
+
list_action_requests = self.graph.execute_graphql(
|
|
145
|
+
query=ACTION_REQUESTS,
|
|
146
|
+
variables={
|
|
147
|
+
"input": {
|
|
148
|
+
"status": "PENDING",
|
|
149
|
+
"allActionRequests": True,
|
|
150
|
+
"start": start,
|
|
151
|
+
"count": self.config.batch_size,
|
|
152
|
+
}
|
|
153
|
+
},
|
|
154
|
+
)
|
|
155
|
+
assert list_action_requests is not None
|
|
156
|
+
listActionRequests = list_action_requests.get("listActionRequests")
|
|
157
|
+
assert listActionRequests is not None
|
|
158
|
+
self.report.total_requests = listActionRequests.get("total", 0)
|
|
159
|
+
return listActionRequests.get("actionRequests", [])
|
|
160
|
+
|
|
161
|
+
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
162
|
+
start = 0
|
|
163
|
+
while True:
|
|
164
|
+
action_requests = self._get_action_requests(start)
|
|
165
|
+
if len(action_requests) == 0:
|
|
166
|
+
break
|
|
167
|
+
for action_request in action_requests:
|
|
168
|
+
result = self._process_action_request(action_request)
|
|
169
|
+
if result is not None:
|
|
170
|
+
yield result.as_workunit()
|
|
171
|
+
start += self.config.batch_size
|
|
172
|
+
|
|
173
|
+
def get_report(self) -> SourceReport:
|
|
174
|
+
return self.report
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from acryl_datahub_cloud.api.client import AcrylGraph
|
|
1
|
+
from acryl_datahub_cloud.api.client import AcrylGraph as AcrylGraph
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from acryl_datahub_cloud.api.entity_versioning import EntityVersioningAPI
|
|
2
1
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
3
2
|
|
|
4
3
|
|
|
5
|
-
|
|
4
|
+
# Add other graph mixins here when applicable
|
|
5
|
+
class AcrylGraph(DataHubGraph):
|
|
6
6
|
pass
|
|
@@ -66,9 +66,9 @@ class DatasetRegistrationSpec(BaseModel):
|
|
|
66
66
|
|
|
67
67
|
class FileStoreBackedDatasetConfig(ConfigModel):
|
|
68
68
|
dataset_name: str
|
|
69
|
-
dataset_urn: Optional[
|
|
70
|
-
|
|
71
|
-
|
|
69
|
+
dataset_urn: Optional[str] = (
|
|
70
|
+
None # If not set, it will be generated from the dataset_name
|
|
71
|
+
)
|
|
72
72
|
bucket_prefix: str
|
|
73
73
|
store_platform: str = "s3"
|
|
74
74
|
file_name: str = "data"
|
|
@@ -79,9 +79,9 @@ class FileStoreBackedDatasetConfig(ConfigModel):
|
|
|
79
79
|
generate_presigned_url: bool = True
|
|
80
80
|
presigned_url_expiry_days: int = 7
|
|
81
81
|
dataset_registration_spec: DatasetRegistrationSpec = DatasetRegistrationSpec()
|
|
82
|
-
file: Optional[
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
file: Optional[str] = (
|
|
83
|
+
None # This is the file to be registered. When set, the file will be registered as a dataset immediately
|
|
84
|
+
)
|
|
85
85
|
|
|
86
86
|
datahub_platform: str = "acryl"
|
|
87
87
|
|
|
@@ -10,11 +10,13 @@ from pydantic import BaseModel
|
|
|
10
10
|
from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow
|
|
11
11
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
12
12
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
13
|
-
from datahub.ingestion.graph.filters import
|
|
13
|
+
from datahub.ingestion.graph.filters import RawSearchFilterRule
|
|
14
14
|
from datahub.metadata.schema_classes import (
|
|
15
15
|
DomainPropertiesClass,
|
|
16
|
+
FormAssociationClass,
|
|
16
17
|
FormInfoClass,
|
|
17
18
|
FormsClass,
|
|
19
|
+
FormStateClass,
|
|
18
20
|
FormTypeClass,
|
|
19
21
|
)
|
|
20
22
|
|
|
@@ -40,7 +42,7 @@ class FormType(str, Enum):
|
|
|
40
42
|
class FormReportingRow(BaseModelRow):
|
|
41
43
|
form_urn: str
|
|
42
44
|
form_type: FormType
|
|
43
|
-
form_assigned_date: date
|
|
45
|
+
form_assigned_date: Optional[date]
|
|
44
46
|
form_completed_date: Optional[date]
|
|
45
47
|
form_status: FormStatus
|
|
46
48
|
question_id: str
|
|
@@ -147,7 +149,7 @@ class DataHubFormReportingData(FormData):
|
|
|
147
149
|
)
|
|
148
150
|
)
|
|
149
151
|
|
|
150
|
-
def get_form_existence_or_filters(self) -> List[
|
|
152
|
+
def get_form_existence_or_filters(self) -> List[RawSearchFilterRule]:
|
|
151
153
|
"""
|
|
152
154
|
Datasets must either have completedForms or incompleteForms assigned to
|
|
153
155
|
them
|
|
@@ -177,24 +179,64 @@ class DataHubFormReportingData(FormData):
|
|
|
177
179
|
},
|
|
178
180
|
]
|
|
179
181
|
|
|
182
|
+
def is_published(self, form_urn: str) -> bool:
|
|
183
|
+
form_info = self.form_registry.get_form(form_urn)
|
|
184
|
+
return (
|
|
185
|
+
form_info.status.state == FormStateClass.PUBLISHED if form_info else False
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def form_published_time(self, form_urn: str) -> float:
|
|
189
|
+
form_info = self.form_registry.get_form(form_urn)
|
|
190
|
+
is_published = (
|
|
191
|
+
form_info.status.state == FormStateClass.PUBLISHED if form_info else False
|
|
192
|
+
)
|
|
193
|
+
return (
|
|
194
|
+
form_info.status.lastModified.time / 1000
|
|
195
|
+
if form_info and form_info.status.lastModified and is_published
|
|
196
|
+
else 0
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def assigned_to_asset_time(self, form_association: FormAssociationClass) -> float:
|
|
200
|
+
return form_association.created.time / 1000 if form_association.created else 0
|
|
201
|
+
|
|
202
|
+
def assignment_time(self, form_association: FormAssociationClass) -> float:
|
|
203
|
+
published_time = self.form_published_time(form_association.urn)
|
|
204
|
+
assigned_to_asset_time = self.assigned_to_asset_time(form_association)
|
|
205
|
+
return max(published_time, assigned_to_asset_time)
|
|
206
|
+
|
|
207
|
+
# For a given asset, the assigned date is the more recent of the published date and the time this was actually assigned.
|
|
208
|
+
# Assets can be assigned before publishing, but we don't want to show that date because it's not open to the public yet.
|
|
209
|
+
# Assets can also be assigned after publishing, so we should show that date for those assets.
|
|
180
210
|
def form_assigned_date(
|
|
181
211
|
self, search_row: DataHubDatasetSearchRow
|
|
182
|
-
) -> Dict[str, date]:
|
|
183
|
-
form_assigned_dates: Dict[str, date] = {}
|
|
212
|
+
) -> Dict[str, Optional[date]]:
|
|
213
|
+
form_assigned_dates: Dict[str, Optional[date]] = {}
|
|
184
214
|
forms = self.graph.get_aspect(search_row.urn, FormsClass)
|
|
185
215
|
if not forms:
|
|
186
216
|
return form_assigned_dates
|
|
187
217
|
assert forms, f"Forms aspect not found for {search_row.urn}"
|
|
188
218
|
for incomplete_form in forms.incompleteForms:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
219
|
+
is_published = self.is_published(incomplete_form.urn)
|
|
220
|
+
assignment_time = self.assignment_time(incomplete_form)
|
|
221
|
+
form_assigned_dates[incomplete_form.urn] = (
|
|
222
|
+
datetime.fromtimestamp(
|
|
223
|
+
assignment_time,
|
|
224
|
+
tz=timezone.utc,
|
|
225
|
+
).date()
|
|
226
|
+
if is_published and assignment_time != 0
|
|
227
|
+
else None
|
|
228
|
+
)
|
|
193
229
|
for completed_form in forms.completedForms:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
230
|
+
is_published = self.is_published(completed_form.urn)
|
|
231
|
+
assignment_time = self.assignment_time(completed_form)
|
|
232
|
+
form_assigned_dates[completed_form.urn] = (
|
|
233
|
+
datetime.fromtimestamp(
|
|
234
|
+
assignment_time,
|
|
235
|
+
tz=timezone.utc,
|
|
236
|
+
).date()
|
|
237
|
+
if is_published and assignment_time != 0
|
|
238
|
+
else None
|
|
239
|
+
)
|
|
198
240
|
return form_assigned_dates
|
|
199
241
|
|
|
200
242
|
def form_completed_date(
|
|
@@ -206,7 +248,9 @@ class DataHubFormReportingData(FormData):
|
|
|
206
248
|
form_completion_dates = {}
|
|
207
249
|
for form in search_row.completedForms:
|
|
208
250
|
form_info = self.form_registry.get_form(form)
|
|
209
|
-
|
|
251
|
+
if not form_info:
|
|
252
|
+
logger.warning(f"Found form attached that does not exist: {form}")
|
|
253
|
+
continue
|
|
210
254
|
form_prompts = [x.id for x in form_info.prompts]
|
|
211
255
|
completed_prompts_map = {
|
|
212
256
|
prompt_id: response_time
|
|
@@ -291,7 +335,11 @@ class DataHubFormReportingData(FormData):
|
|
|
291
335
|
on_form_scanned(form_id)
|
|
292
336
|
forms_scanned.add(form_id)
|
|
293
337
|
form_info = self.form_registry.get_form(form_id)
|
|
294
|
-
|
|
338
|
+
if not form_info:
|
|
339
|
+
logger.warning(
|
|
340
|
+
f"Found form attached that does not exist: {form_id}"
|
|
341
|
+
)
|
|
342
|
+
continue
|
|
295
343
|
form_prompts = [x.id for x in form_info.prompts]
|
|
296
344
|
form_incomplete_prompts = [
|
|
297
345
|
p
|
|
@@ -315,11 +363,6 @@ class DataHubFormReportingData(FormData):
|
|
|
315
363
|
if p in form_prompts
|
|
316
364
|
]:
|
|
317
365
|
for owner in assignees:
|
|
318
|
-
if form_id not in form_assigned_dates:
|
|
319
|
-
logger.warning(
|
|
320
|
-
f"Form {form_id} not found in form_assigned_dates"
|
|
321
|
-
)
|
|
322
|
-
continue
|
|
323
366
|
yield FormReportingRow(
|
|
324
367
|
form_urn=form_id,
|
|
325
368
|
form_assigned_date=form_assigned_dates[form_id],
|
|
@@ -349,11 +392,6 @@ class DataHubFormReportingData(FormData):
|
|
|
349
392
|
if p in form_prompts
|
|
350
393
|
]:
|
|
351
394
|
for owner in assignees:
|
|
352
|
-
if form_id not in form_assigned_dates:
|
|
353
|
-
logger.warning(
|
|
354
|
-
f"Form {form_id} not found in form_assigned_dates"
|
|
355
|
-
)
|
|
356
|
-
continue
|
|
357
395
|
yield FormReportingRow(
|
|
358
396
|
form_urn=form_id,
|
|
359
397
|
form_assigned_date=form_assigned_dates[form_id],
|
|
@@ -393,7 +431,11 @@ class DataHubFormReportingData(FormData):
|
|
|
393
431
|
on_form_scanned(form_id)
|
|
394
432
|
forms_scanned.add(form_id)
|
|
395
433
|
form_info = self.form_registry.get_form(form_id)
|
|
396
|
-
|
|
434
|
+
if not form_info:
|
|
435
|
+
logger.warning(
|
|
436
|
+
f"Found form attached that does not exist: {form_id}"
|
|
437
|
+
)
|
|
438
|
+
continue
|
|
397
439
|
form_type = (
|
|
398
440
|
FormType.DOCUMENTATION
|
|
399
441
|
if form_info.type == FormTypeClass.COMPLETION
|
|
@@ -411,11 +453,8 @@ class DataHubFormReportingData(FormData):
|
|
|
411
453
|
for prompt_id in [
|
|
412
454
|
p
|
|
413
455
|
for p in search_row.completedFormsIncompletePromptIds
|
|
414
|
-
|
|
456
|
+
if p in form_prompts
|
|
415
457
|
]:
|
|
416
|
-
logger.warning(
|
|
417
|
-
f"Unexpected incomplete prompt {prompt_id} in completed form {form_id}"
|
|
418
|
-
)
|
|
419
458
|
for owner in assignees:
|
|
420
459
|
yield FormReportingRow(
|
|
421
460
|
form_urn=form_id,
|
|
@@ -450,11 +489,6 @@ class DataHubFormReportingData(FormData):
|
|
|
450
489
|
if p in form_prompts
|
|
451
490
|
]:
|
|
452
491
|
for owner in assignees:
|
|
453
|
-
if form_id not in form_assigned_dates:
|
|
454
|
-
logger.warning(
|
|
455
|
-
f"Form {form_id} not found in form_assigned_dates"
|
|
456
|
-
)
|
|
457
|
-
continue
|
|
458
492
|
yield FormReportingRow(
|
|
459
493
|
form_urn=form_id,
|
|
460
494
|
form_assigned_date=form_assigned_dates[form_id],
|
|
@@ -54,9 +54,9 @@ class DataHubReportingExtractSQLSourceConfig(ConfigModel):
|
|
|
54
54
|
|
|
55
55
|
if "file" not in v:
|
|
56
56
|
default_config = FileStoreBackedDatasetConfig.dummy()
|
|
57
|
-
v[
|
|
58
|
-
"
|
|
59
|
-
|
|
57
|
+
v["file"] = (
|
|
58
|
+
f"{default_config.file_name}.{default_config.file_extension}"
|
|
59
|
+
)
|
|
60
60
|
else:
|
|
61
61
|
v["file_name"] = v["file"].split(".")[0]
|
|
62
62
|
v["file_extension"] = v["file"].split(".")[-1]
|
|
@@ -133,7 +133,7 @@ class DataHubReportingExtractSQLSource(Source):
|
|
|
133
133
|
tmp_dir_aux = (
|
|
134
134
|
self.ctx.pipeline_name if self.ctx.pipeline_name else "sql_default_dir"
|
|
135
135
|
)
|
|
136
|
-
tmp_dir = f
|
|
136
|
+
tmp_dir = f"/tmp/{tmp_dir_aux.replace(':', '_')}"
|
|
137
137
|
|
|
138
138
|
output_file = (
|
|
139
139
|
self.datahub_based_s3_dataset.config.file
|
|
@@ -37,7 +37,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
37
37
|
self._add_patch(
|
|
38
38
|
UsageFeaturesClass.ASPECT_NAME,
|
|
39
39
|
"add",
|
|
40
|
-
path="
|
|
40
|
+
path=("queryCountLast30Days",),
|
|
41
41
|
value=count,
|
|
42
42
|
)
|
|
43
43
|
return self
|
|
@@ -56,7 +56,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
56
56
|
self._add_patch(
|
|
57
57
|
UsageFeaturesClass.ASPECT_NAME,
|
|
58
58
|
"add",
|
|
59
|
-
path="
|
|
59
|
+
path=("viewCountLast30Days",),
|
|
60
60
|
value=count,
|
|
61
61
|
)
|
|
62
62
|
return self
|
|
@@ -73,7 +73,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
73
73
|
self._add_patch(
|
|
74
74
|
UsageFeaturesClass.ASPECT_NAME,
|
|
75
75
|
"add",
|
|
76
|
-
path="
|
|
76
|
+
path=("viewCountTotal",),
|
|
77
77
|
value=count,
|
|
78
78
|
)
|
|
79
79
|
return self
|
|
@@ -92,7 +92,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
92
92
|
self._add_patch(
|
|
93
93
|
UsageFeaturesClass.ASPECT_NAME,
|
|
94
94
|
"add",
|
|
95
|
-
path="
|
|
95
|
+
path=("viewCountPercentileLast30Days",),
|
|
96
96
|
value=count,
|
|
97
97
|
)
|
|
98
98
|
return self
|
|
@@ -111,7 +111,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
111
111
|
self._add_patch(
|
|
112
112
|
UsageFeaturesClass.ASPECT_NAME,
|
|
113
113
|
"add",
|
|
114
|
-
path="
|
|
114
|
+
path=("usageCountLast30Days",),
|
|
115
115
|
value=count,
|
|
116
116
|
)
|
|
117
117
|
return self
|
|
@@ -130,7 +130,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
130
130
|
self._add_patch(
|
|
131
131
|
UsageFeaturesClass.ASPECT_NAME,
|
|
132
132
|
"add",
|
|
133
|
-
path="
|
|
133
|
+
path=("uniqueUserCountLast30Days",),
|
|
134
134
|
value=count,
|
|
135
135
|
)
|
|
136
136
|
return self
|
|
@@ -149,7 +149,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
149
149
|
self._add_patch(
|
|
150
150
|
UsageFeaturesClass.ASPECT_NAME,
|
|
151
151
|
"add",
|
|
152
|
-
path="
|
|
152
|
+
path=("writeCountLast30Days",),
|
|
153
153
|
value=count,
|
|
154
154
|
)
|
|
155
155
|
return self
|
|
@@ -168,7 +168,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
168
168
|
self._add_patch(
|
|
169
169
|
UsageFeaturesClass.ASPECT_NAME,
|
|
170
170
|
"add",
|
|
171
|
-
path="
|
|
171
|
+
path=("queryCountPercentileLast30Days",),
|
|
172
172
|
value=count,
|
|
173
173
|
)
|
|
174
174
|
return self
|
|
@@ -187,7 +187,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
187
187
|
self._add_patch(
|
|
188
188
|
UsageFeaturesClass.ASPECT_NAME,
|
|
189
189
|
"add",
|
|
190
|
-
path="
|
|
190
|
+
path=("queryCountRankLast30Days",),
|
|
191
191
|
value=count,
|
|
192
192
|
)
|
|
193
193
|
return self
|
|
@@ -206,7 +206,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
206
206
|
self._add_patch(
|
|
207
207
|
UsageFeaturesClass.ASPECT_NAME,
|
|
208
208
|
"add",
|
|
209
|
-
path="
|
|
209
|
+
path=("uniqueUserPercentileLast30Days",),
|
|
210
210
|
value=count,
|
|
211
211
|
)
|
|
212
212
|
return self
|
|
@@ -225,7 +225,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
225
225
|
self._add_patch(
|
|
226
226
|
UsageFeaturesClass.ASPECT_NAME,
|
|
227
227
|
"add",
|
|
228
|
-
path="
|
|
228
|
+
path=("uniqueUserRankLast30Days",),
|
|
229
229
|
value=count,
|
|
230
230
|
)
|
|
231
231
|
return self
|
|
@@ -244,7 +244,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
244
244
|
self._add_patch(
|
|
245
245
|
UsageFeaturesClass.ASPECT_NAME,
|
|
246
246
|
"add",
|
|
247
|
-
path="
|
|
247
|
+
path=("writeCountPercentileLast30Days",),
|
|
248
248
|
value=count,
|
|
249
249
|
)
|
|
250
250
|
return self
|
|
@@ -263,7 +263,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
263
263
|
self._add_patch(
|
|
264
264
|
UsageFeaturesClass.ASPECT_NAME,
|
|
265
265
|
"add",
|
|
266
|
-
path="
|
|
266
|
+
path=("writeCountRankLast30Days",),
|
|
267
267
|
value=count,
|
|
268
268
|
)
|
|
269
269
|
return self
|
|
@@ -282,7 +282,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
282
282
|
self._add_patch(
|
|
283
283
|
UsageFeaturesClass.ASPECT_NAME,
|
|
284
284
|
"add",
|
|
285
|
-
path="
|
|
285
|
+
path=("topUsersLast30Days",),
|
|
286
286
|
value=users,
|
|
287
287
|
)
|
|
288
288
|
return self
|
|
@@ -301,7 +301,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
301
301
|
self._add_patch(
|
|
302
302
|
UsageFeaturesClass.ASPECT_NAME,
|
|
303
303
|
"add",
|
|
304
|
-
path="
|
|
304
|
+
path=("sizeInBytesPercentile",),
|
|
305
305
|
value=percentile,
|
|
306
306
|
)
|
|
307
307
|
return self
|
|
@@ -318,7 +318,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
318
318
|
self._add_patch(
|
|
319
319
|
UsageFeaturesClass.ASPECT_NAME,
|
|
320
320
|
"add",
|
|
321
|
-
path="
|
|
321
|
+
path=("sizeInBytesRank",),
|
|
322
322
|
value=rank,
|
|
323
323
|
)
|
|
324
324
|
return self
|
|
@@ -337,7 +337,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
337
337
|
self._add_patch(
|
|
338
338
|
UsageFeaturesClass.ASPECT_NAME,
|
|
339
339
|
"add",
|
|
340
|
-
path="
|
|
340
|
+
path=("rowCountPercentile",),
|
|
341
341
|
value=percentile,
|
|
342
342
|
)
|
|
343
343
|
return self
|
|
@@ -356,7 +356,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
356
356
|
self._add_patch(
|
|
357
357
|
UsageFeaturesClass.ASPECT_NAME,
|
|
358
358
|
"add",
|
|
359
|
-
path="
|
|
359
|
+
path=("usageSearchScoreMultiplier",),
|
|
360
360
|
value=multiplier,
|
|
361
361
|
)
|
|
362
362
|
return self
|
|
@@ -375,7 +375,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
375
375
|
self._add_patch(
|
|
376
376
|
UsageFeaturesClass.ASPECT_NAME,
|
|
377
377
|
"add",
|
|
378
|
-
path="
|
|
378
|
+
path=("usageFreshnessScoreMultiplier",),
|
|
379
379
|
value=multiplier,
|
|
380
380
|
)
|
|
381
381
|
return self
|
|
@@ -394,7 +394,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
394
394
|
self._add_patch(
|
|
395
395
|
UsageFeaturesClass.ASPECT_NAME,
|
|
396
396
|
"add",
|
|
397
|
-
path="
|
|
397
|
+
path=("customDatahubScoreMultiplier",),
|
|
398
398
|
value=multiplier,
|
|
399
399
|
)
|
|
400
400
|
return self
|
|
@@ -413,7 +413,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
|
|
|
413
413
|
self._add_patch(
|
|
414
414
|
UsageFeaturesClass.ASPECT_NAME,
|
|
415
415
|
"add",
|
|
416
|
-
path="
|
|
416
|
+
path=("combinedSearchRankingMultiplier",),
|
|
417
417
|
value=multiplier,
|
|
418
418
|
)
|
|
419
419
|
return self
|