acryl-datahub-cloud 0.3.8.3__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (79) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
  3. acryl_datahub_cloud/action_request/__init__.py +0 -0
  4. acryl_datahub_cloud/action_request/action_request_owner_source.py +174 -0
  5. acryl_datahub_cloud/api/__init__.py +1 -1
  6. acryl_datahub_cloud/api/client.py +2 -2
  7. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +6 -6
  8. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +69 -35
  9. acryl_datahub_cloud/datahub_reporting/extract_sql.py +4 -4
  10. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_patch_builder.py +21 -21
  11. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +14 -13
  12. acryl_datahub_cloud/metadata/_urns/urn_defs.py +1130 -484
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionrequest/__init__.py +6 -0
  14. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/dataplatforminstance/__init__.py +2 -0
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +2 -0
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executorglobalconfig/__init__.py +15 -0
  17. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/executorpool/__init__.py +4 -0
  18. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  19. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metric/__init__.py +29 -0
  20. acryl_datahub_cloud/metadata/schema.avsc +839 -49
  21. acryl_datahub_cloud/metadata/schema_classes.py +1286 -63
  22. acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +422 -12
  23. acryl_datahub_cloud/metadata/schemas/ActionRequestStatus.avsc +12 -0
  24. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +5 -3
  25. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +5 -3
  26. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +5 -3
  27. acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
  28. acryl_datahub_cloud/metadata/schemas/BusinessAttributes.avsc +6 -0
  29. acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +1 -0
  30. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +3 -3
  31. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -1
  32. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +1 -1
  33. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +3 -3
  34. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -1
  35. acryl_datahub_cloud/metadata/schemas/DataHubActionInfo.avsc +1 -1
  36. acryl_datahub_cloud/metadata/schemas/DataHubConnectionKey.avsc +2 -1
  37. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceInfo.avsc +9 -4
  38. acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeDefinition.avsc +185 -0
  39. acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeEvent.avsc +184 -0
  40. acryl_datahub_cloud/metadata/schemas/DataHubMetricCubeKey.avsc +22 -0
  41. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +4 -4
  42. acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceKey.avsc +2 -1
  43. acryl_datahub_cloud/metadata/schemas/DataProcessInstanceInput.avsc +132 -2
  44. acryl_datahub_cloud/metadata/schemas/DataProcessInstanceOutput.avsc +131 -1
  45. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +14 -13
  46. acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
  47. acryl_datahub_cloud/metadata/schemas/ExecutionRequestInput.avsc +6 -1
  48. acryl_datahub_cloud/metadata/schemas/ExecutionRequestSignal.avsc +1 -1
  49. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +5 -0
  50. acryl_datahub_cloud/metadata/schemas/GlossaryTerms.avsc +3 -1
  51. acryl_datahub_cloud/metadata/schemas/IcebergCatalogInfo.avsc +28 -0
  52. acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +96 -0
  53. acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +4 -1
  54. acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +4 -1
  55. acryl_datahub_cloud/metadata/schemas/InputFields.avsc +3 -1
  56. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -1
  57. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -1
  58. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -1
  59. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +3 -3
  60. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -1
  61. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +399 -176
  62. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +6 -4
  63. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -1
  64. acryl_datahub_cloud/metadata/schemas/Operation.avsc +4 -2
  65. acryl_datahub_cloud/metadata/schemas/RemoteExecutorGlobalConfigKey.avsc +21 -0
  66. acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolGlobalConfig.avsc +16 -0
  67. acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolInfo.avsc +85 -0
  68. acryl_datahub_cloud/metadata/schemas/RemoteExecutorPoolKey.avsc +1 -1
  69. acryl_datahub_cloud/metadata/schemas/RemoteExecutorStatus.avsc +5 -5
  70. acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +2 -2
  71. acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +3 -1
  72. acryl_datahub_cloud/metadata/schemas/VersionProperties.avsc +18 -0
  73. acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc +5 -0
  74. {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/METADATA +42 -42
  75. {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/RECORD +78 -68
  76. {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/entry_points.txt +1 -0
  77. acryl_datahub_cloud/api/entity_versioning.py +0 -167
  78. {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/WHEEL +0 -0
  79. {acryl_datahub_cloud-0.3.8.3.dist-info → acryl_datahub_cloud-0.3.9.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "acryl-datahub-cloud",
3
- "version": "0.3.8.3",
3
+ "version": "0.3.9",
4
4
  "install_requires": [
5
5
  "avro-gen3==0.7.16",
6
6
  "acryl-datahub"
@@ -662,7 +662,7 @@ def generate_markdown_with_logo(
662
662
  logo_url = get_platform_logo_url(platform)
663
663
  formatted_date = date.strftime("%B %d, %Y")
664
664
  if logo_url:
665
- markdown = f'<img src="{logo_url}" alt="Logo" width="{logo_size}" height="{logo_size}" style="vertical-align: middle;"> by {author.name} on {formatted_date} {""if is_public else "(Internal)"}'
665
+ markdown = f'<img src="{logo_url}" alt="Logo" width="{logo_size}" height="{logo_size}" style="vertical-align: middle;"> by {author.name} on {formatted_date} {"" if is_public else "(Internal)"}'
666
666
  else:
667
667
  markdown = f":{platform}: by {author.name} on {formatted_date} {'' if is_public else '(Internal)'}"
668
668
  return markdown
File without changes
@@ -0,0 +1,174 @@
1
+ import logging
2
+ from typing import Dict, Iterable, List, Optional
3
+
4
+ from datahub.configuration import ConfigModel
5
+ from datahub.ingestion.api.common import PipelineContext
6
+ from datahub.ingestion.api.decorators import (
7
+ SupportStatus,
8
+ config_class,
9
+ platform_name,
10
+ support_status,
11
+ )
12
+ from datahub.ingestion.api.source import Source, SourceReport
13
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
14
+ from datahub.metadata.schema_classes import (
15
+ ActionRequestInfoClass,
16
+ )
17
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
18
+ from datahub.utilities.urns.urn import guess_entity_type
19
+
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class ActionRequestOwnerSourceConfig(ConfigModel):
25
+ batch_size: int = 20
26
+
27
+
28
+ class ActionRequestOwnerSourceReport(SourceReport):
29
+ total_requests: int = 0
30
+ processed_proposals = 0
31
+ correct_assignees_not_found = 0
32
+ correct_proposal_owners = 0
33
+ incorrect_proposal_owners = 0
34
+ action_request_info_not_found = 0
35
+
36
+
37
+ ACTION_REQUESTS = """
38
+ query listActionRequests($input: ListActionRequestsInput!) {
39
+ listActionRequests(input: $input) {
40
+ start
41
+ count
42
+ total
43
+ actionRequests {
44
+ urn
45
+ type
46
+ entity {
47
+ urn
48
+ }
49
+ subResource
50
+ subResourceType
51
+ assignedUsers
52
+ assignedGroups
53
+ assignedRoles
54
+ }
55
+ }
56
+ }
57
+ """
58
+
59
+ ACTION_REQUEST_ASSIGNEES = """
60
+ query getActionRequestAssignee($input: GetActionRequestAssigneeInput!) {
61
+ getActionRequestAssignee(input: $input)
62
+ }
63
+ """
64
+
65
+
66
+ @platform_name(id="datahub", platform_name="DataHub")
67
+ @config_class(ActionRequestOwnerSourceConfig)
68
+ @support_status(SupportStatus.INCUBATING)
69
+ class ActionRequestOwnerSource(Source):
70
+ def __init__(self, config: ActionRequestOwnerSourceConfig, ctx: PipelineContext):
71
+ super().__init__(ctx)
72
+ self.config: ActionRequestOwnerSourceConfig = config
73
+ self.report = ActionRequestOwnerSourceReport()
74
+ self.graph = ctx.require_graph("Proposal Owner source")
75
+ self.event_not_produced_warn = False
76
+
77
+ def _process_action_request(
78
+ self, action_request: Dict
79
+ ) -> Optional[MetadataChangeProposalWrapper]:
80
+ self.report.processed_proposals += 1
81
+ action_request_urn = action_request.get("urn")
82
+ action_type = action_request.get("type")
83
+ action_request_entity = action_request.get("entity")
84
+ assert action_request_entity is not None
85
+ resource_urn = action_request_entity.get("urn")
86
+ sub_resource = action_request.get("subResource")
87
+ sub_resource_type = action_request.get("subResourceType")
88
+ assigned_users = action_request.get("assignedUsers")
89
+ assigned_groups = action_request.get("assignedGroups")
90
+ assigned_roles = action_request.get("assignedRoles")
91
+
92
+ correct_assignees = self.graph.execute_graphql(
93
+ query=ACTION_REQUEST_ASSIGNEES,
94
+ variables={
95
+ "input": {
96
+ "resourceUrn": resource_urn,
97
+ "actionRequestType": action_type,
98
+ "subResource": sub_resource,
99
+ "subResourceType": sub_resource_type,
100
+ }
101
+ },
102
+ ).get("getActionRequestAssignee")
103
+ if correct_assignees is None:
104
+ self.report.correct_assignees_not_found += 1
105
+ logger.error(
106
+ f"Correct assignees not found for action request {action_request_urn}"
107
+ )
108
+ return None
109
+ correct_users = [
110
+ x for x in correct_assignees if guess_entity_type(x) == "corpuser"
111
+ ]
112
+ correct_groups = [
113
+ x for x in correct_assignees if guess_entity_type(x) == "corpGroup"
114
+ ]
115
+ correct_roles = [
116
+ x for x in correct_assignees if guess_entity_type(x) == "dataHubRole"
117
+ ]
118
+ if (
119
+ assigned_users == correct_users
120
+ and assigned_groups == correct_groups
121
+ and assigned_roles == correct_roles
122
+ ):
123
+ self.report.correct_proposal_owners += 1
124
+ return None
125
+ action_request_info = self.graph.get_aspect_v2(
126
+ entity_urn=str(action_request_urn),
127
+ aspect="actionRequestInfo",
128
+ aspect_type=ActionRequestInfoClass,
129
+ )
130
+ if action_request_info is None:
131
+ self.report.action_request_info_not_found += 1
132
+ logger.error(
133
+ f"Action request info not found for action request {action_request_urn}"
134
+ )
135
+ return None
136
+ action_request_info.assignedUsers = correct_users
137
+ action_request_info.assignedGroups = correct_groups
138
+ action_request_info.assignedRoles = correct_roles
139
+ return MetadataChangeProposalWrapper(
140
+ entityUrn=action_request_urn, aspect=action_request_info
141
+ )
142
+
143
+ def _get_action_requests(self, start: int) -> List:
144
+ list_action_requests = self.graph.execute_graphql(
145
+ query=ACTION_REQUESTS,
146
+ variables={
147
+ "input": {
148
+ "status": "PENDING",
149
+ "allActionRequests": True,
150
+ "start": start,
151
+ "count": self.config.batch_size,
152
+ }
153
+ },
154
+ )
155
+ assert list_action_requests is not None
156
+ listActionRequests = list_action_requests.get("listActionRequests")
157
+ assert listActionRequests is not None
158
+ self.report.total_requests = listActionRequests.get("total", 0)
159
+ return listActionRequests.get("actionRequests", [])
160
+
161
+ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
162
+ start = 0
163
+ while True:
164
+ action_requests = self._get_action_requests(start)
165
+ if len(action_requests) == 0:
166
+ break
167
+ for action_request in action_requests:
168
+ result = self._process_action_request(action_request)
169
+ if result is not None:
170
+ yield result.as_workunit()
171
+ start += self.config.batch_size
172
+
173
+ def get_report(self) -> SourceReport:
174
+ return self.report
@@ -1 +1 @@
1
- from acryl_datahub_cloud.api.client import AcrylGraph
1
+ from acryl_datahub_cloud.api.client import AcrylGraph as AcrylGraph
@@ -1,6 +1,6 @@
1
- from acryl_datahub_cloud.api.entity_versioning import EntityVersioningAPI
2
1
  from datahub.ingestion.graph.client import DataHubGraph
3
2
 
4
3
 
5
- class AcrylGraph(EntityVersioningAPI, DataHubGraph):
4
+ # Add other graph mixins here when applicable
5
+ class AcrylGraph(DataHubGraph):
6
6
  pass
@@ -66,9 +66,9 @@ class DatasetRegistrationSpec(BaseModel):
66
66
 
67
67
  class FileStoreBackedDatasetConfig(ConfigModel):
68
68
  dataset_name: str
69
- dataset_urn: Optional[
70
- str
71
- ] = None # If not set, it will be generated from the dataset_name
69
+ dataset_urn: Optional[str] = (
70
+ None # If not set, it will be generated from the dataset_name
71
+ )
72
72
  bucket_prefix: str
73
73
  store_platform: str = "s3"
74
74
  file_name: str = "data"
@@ -79,9 +79,9 @@ class FileStoreBackedDatasetConfig(ConfigModel):
79
79
  generate_presigned_url: bool = True
80
80
  presigned_url_expiry_days: int = 7
81
81
  dataset_registration_spec: DatasetRegistrationSpec = DatasetRegistrationSpec()
82
- file: Optional[
83
- str
84
- ] = None # This is the file to be registered. When set, the file will be registered as a dataset immediately
82
+ file: Optional[str] = (
83
+ None # This is the file to be registered. When set, the file will be registered as a dataset immediately
84
+ )
85
85
 
86
86
  datahub_platform: str = "acryl"
87
87
 
@@ -10,11 +10,13 @@ from pydantic import BaseModel
10
10
  from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow
11
11
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
12
12
  from datahub.ingestion.graph.client import DataHubGraph
13
- from datahub.ingestion.graph.filters import SearchFilterRule
13
+ from datahub.ingestion.graph.filters import RawSearchFilterRule
14
14
  from datahub.metadata.schema_classes import (
15
15
  DomainPropertiesClass,
16
+ FormAssociationClass,
16
17
  FormInfoClass,
17
18
  FormsClass,
19
+ FormStateClass,
18
20
  FormTypeClass,
19
21
  )
20
22
 
@@ -40,7 +42,7 @@ class FormType(str, Enum):
40
42
  class FormReportingRow(BaseModelRow):
41
43
  form_urn: str
42
44
  form_type: FormType
43
- form_assigned_date: date
45
+ form_assigned_date: Optional[date]
44
46
  form_completed_date: Optional[date]
45
47
  form_status: FormStatus
46
48
  question_id: str
@@ -147,7 +149,7 @@ class DataHubFormReportingData(FormData):
147
149
  )
148
150
  )
149
151
 
150
- def get_form_existence_or_filters(self) -> List[SearchFilterRule]:
152
+ def get_form_existence_or_filters(self) -> List[RawSearchFilterRule]:
151
153
  """
152
154
  Datasets must either have completedForms or incompleteForms assigned to
153
155
  them
@@ -177,24 +179,64 @@ class DataHubFormReportingData(FormData):
177
179
  },
178
180
  ]
179
181
 
182
+ def is_published(self, form_urn: str) -> bool:
183
+ form_info = self.form_registry.get_form(form_urn)
184
+ return (
185
+ form_info.status.state == FormStateClass.PUBLISHED if form_info else False
186
+ )
187
+
188
+ def form_published_time(self, form_urn: str) -> float:
189
+ form_info = self.form_registry.get_form(form_urn)
190
+ is_published = (
191
+ form_info.status.state == FormStateClass.PUBLISHED if form_info else False
192
+ )
193
+ return (
194
+ form_info.status.lastModified.time / 1000
195
+ if form_info and form_info.status.lastModified and is_published
196
+ else 0
197
+ )
198
+
199
+ def assigned_to_asset_time(self, form_association: FormAssociationClass) -> float:
200
+ return form_association.created.time / 1000 if form_association.created else 0
201
+
202
+ def assignment_time(self, form_association: FormAssociationClass) -> float:
203
+ published_time = self.form_published_time(form_association.urn)
204
+ assigned_to_asset_time = self.assigned_to_asset_time(form_association)
205
+ return max(published_time, assigned_to_asset_time)
206
+
207
+ # For a given asset, the assigned date is the more recent of the published date and the time this was actually assigned.
208
+ # Assets can be assigned before publishing, but we don't want to show that date because it's not open to the public yet.
209
+ # Assets can also be assigned after publishing, so we should show that date for those assets.
180
210
  def form_assigned_date(
181
211
  self, search_row: DataHubDatasetSearchRow
182
- ) -> Dict[str, date]:
183
- form_assigned_dates: Dict[str, date] = {}
212
+ ) -> Dict[str, Optional[date]]:
213
+ form_assigned_dates: Dict[str, Optional[date]] = {}
184
214
  forms = self.graph.get_aspect(search_row.urn, FormsClass)
185
215
  if not forms:
186
216
  return form_assigned_dates
187
217
  assert forms, f"Forms aspect not found for {search_row.urn}"
188
218
  for incomplete_form in forms.incompleteForms:
189
- form_assigned_dates[incomplete_form.urn] = datetime.fromtimestamp(
190
- incomplete_form.created.time / 1000 if incomplete_form.created else 0,
191
- tz=timezone.utc,
192
- ).date()
219
+ is_published = self.is_published(incomplete_form.urn)
220
+ assignment_time = self.assignment_time(incomplete_form)
221
+ form_assigned_dates[incomplete_form.urn] = (
222
+ datetime.fromtimestamp(
223
+ assignment_time,
224
+ tz=timezone.utc,
225
+ ).date()
226
+ if is_published and assignment_time != 0
227
+ else None
228
+ )
193
229
  for completed_form in forms.completedForms:
194
- form_assigned_dates[completed_form.urn] = datetime.fromtimestamp(
195
- completed_form.created.time / 1000 if completed_form.created else 0,
196
- tz=timezone.utc,
197
- ).date()
230
+ is_published = self.is_published(completed_form.urn)
231
+ assignment_time = self.assignment_time(completed_form)
232
+ form_assigned_dates[completed_form.urn] = (
233
+ datetime.fromtimestamp(
234
+ assignment_time,
235
+ tz=timezone.utc,
236
+ ).date()
237
+ if is_published and assignment_time != 0
238
+ else None
239
+ )
198
240
  return form_assigned_dates
199
241
 
200
242
  def form_completed_date(
@@ -206,7 +248,9 @@ class DataHubFormReportingData(FormData):
206
248
  form_completion_dates = {}
207
249
  for form in search_row.completedForms:
208
250
  form_info = self.form_registry.get_form(form)
209
- assert form_info, f"Form {form} not found"
251
+ if not form_info:
252
+ logger.warning(f"Found form attached that does not exist: {form}")
253
+ continue
210
254
  form_prompts = [x.id for x in form_info.prompts]
211
255
  completed_prompts_map = {
212
256
  prompt_id: response_time
@@ -291,7 +335,11 @@ class DataHubFormReportingData(FormData):
291
335
  on_form_scanned(form_id)
292
336
  forms_scanned.add(form_id)
293
337
  form_info = self.form_registry.get_form(form_id)
294
- assert form_info, f"Form {form_id} not found"
338
+ if not form_info:
339
+ logger.warning(
340
+ f"Found form attached that does not exist: {form_id}"
341
+ )
342
+ continue
295
343
  form_prompts = [x.id for x in form_info.prompts]
296
344
  form_incomplete_prompts = [
297
345
  p
@@ -315,11 +363,6 @@ class DataHubFormReportingData(FormData):
315
363
  if p in form_prompts
316
364
  ]:
317
365
  for owner in assignees:
318
- if form_id not in form_assigned_dates:
319
- logger.warning(
320
- f"Form {form_id} not found in form_assigned_dates"
321
- )
322
- continue
323
366
  yield FormReportingRow(
324
367
  form_urn=form_id,
325
368
  form_assigned_date=form_assigned_dates[form_id],
@@ -349,11 +392,6 @@ class DataHubFormReportingData(FormData):
349
392
  if p in form_prompts
350
393
  ]:
351
394
  for owner in assignees:
352
- if form_id not in form_assigned_dates:
353
- logger.warning(
354
- f"Form {form_id} not found in form_assigned_dates"
355
- )
356
- continue
357
395
  yield FormReportingRow(
358
396
  form_urn=form_id,
359
397
  form_assigned_date=form_assigned_dates[form_id],
@@ -393,7 +431,11 @@ class DataHubFormReportingData(FormData):
393
431
  on_form_scanned(form_id)
394
432
  forms_scanned.add(form_id)
395
433
  form_info = self.form_registry.get_form(form_id)
396
- assert form_info, f"Form {form_id} not found"
434
+ if not form_info:
435
+ logger.warning(
436
+ f"Found form attached that does not exist: {form_id}"
437
+ )
438
+ continue
397
439
  form_type = (
398
440
  FormType.DOCUMENTATION
399
441
  if form_info.type == FormTypeClass.COMPLETION
@@ -411,11 +453,8 @@ class DataHubFormReportingData(FormData):
411
453
  for prompt_id in [
412
454
  p
413
455
  for p in search_row.completedFormsIncompletePromptIds
414
- for p in form_prompts
456
+ if p in form_prompts
415
457
  ]:
416
- logger.warning(
417
- f"Unexpected incomplete prompt {prompt_id} in completed form {form_id}"
418
- )
419
458
  for owner in assignees:
420
459
  yield FormReportingRow(
421
460
  form_urn=form_id,
@@ -450,11 +489,6 @@ class DataHubFormReportingData(FormData):
450
489
  if p in form_prompts
451
490
  ]:
452
491
  for owner in assignees:
453
- if form_id not in form_assigned_dates:
454
- logger.warning(
455
- f"Form {form_id} not found in form_assigned_dates"
456
- )
457
- continue
458
492
  yield FormReportingRow(
459
493
  form_urn=form_id,
460
494
  form_assigned_date=form_assigned_dates[form_id],
@@ -54,9 +54,9 @@ class DataHubReportingExtractSQLSourceConfig(ConfigModel):
54
54
 
55
55
  if "file" not in v:
56
56
  default_config = FileStoreBackedDatasetConfig.dummy()
57
- v[
58
- "file"
59
- ] = f"{default_config.file_name}.{default_config.file_extension}"
57
+ v["file"] = (
58
+ f"{default_config.file_name}.{default_config.file_extension}"
59
+ )
60
60
  else:
61
61
  v["file_name"] = v["file"].split(".")[0]
62
62
  v["file_extension"] = v["file"].split(".")[-1]
@@ -133,7 +133,7 @@ class DataHubReportingExtractSQLSource(Source):
133
133
  tmp_dir_aux = (
134
134
  self.ctx.pipeline_name if self.ctx.pipeline_name else "sql_default_dir"
135
135
  )
136
- tmp_dir = f'/tmp/{tmp_dir_aux.replace(":", "_")}'
136
+ tmp_dir = f"/tmp/{tmp_dir_aux.replace(':', '_')}"
137
137
 
138
138
  output_file = (
139
139
  self.datahub_based_s3_dataset.config.file
@@ -37,7 +37,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
37
37
  self._add_patch(
38
38
  UsageFeaturesClass.ASPECT_NAME,
39
39
  "add",
40
- path="/queryCountLast30Days",
40
+ path=("queryCountLast30Days",),
41
41
  value=count,
42
42
  )
43
43
  return self
@@ -56,7 +56,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
56
56
  self._add_patch(
57
57
  UsageFeaturesClass.ASPECT_NAME,
58
58
  "add",
59
- path="/viewCountLast30Days",
59
+ path=("viewCountLast30Days",),
60
60
  value=count,
61
61
  )
62
62
  return self
@@ -73,7 +73,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
73
73
  self._add_patch(
74
74
  UsageFeaturesClass.ASPECT_NAME,
75
75
  "add",
76
- path="/viewCountTotal",
76
+ path=("viewCountTotal",),
77
77
  value=count,
78
78
  )
79
79
  return self
@@ -92,7 +92,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
92
92
  self._add_patch(
93
93
  UsageFeaturesClass.ASPECT_NAME,
94
94
  "add",
95
- path="/viewCountPercentileLast30Days",
95
+ path=("viewCountPercentileLast30Days",),
96
96
  value=count,
97
97
  )
98
98
  return self
@@ -111,7 +111,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
111
111
  self._add_patch(
112
112
  UsageFeaturesClass.ASPECT_NAME,
113
113
  "add",
114
- path="/usageCountLast30Days",
114
+ path=("usageCountLast30Days",),
115
115
  value=count,
116
116
  )
117
117
  return self
@@ -130,7 +130,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
130
130
  self._add_patch(
131
131
  UsageFeaturesClass.ASPECT_NAME,
132
132
  "add",
133
- path="/uniqueUserCountLast30Days",
133
+ path=("uniqueUserCountLast30Days",),
134
134
  value=count,
135
135
  )
136
136
  return self
@@ -149,7 +149,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
149
149
  self._add_patch(
150
150
  UsageFeaturesClass.ASPECT_NAME,
151
151
  "add",
152
- path="/writeCountLast30Days",
152
+ path=("writeCountLast30Days",),
153
153
  value=count,
154
154
  )
155
155
  return self
@@ -168,7 +168,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
168
168
  self._add_patch(
169
169
  UsageFeaturesClass.ASPECT_NAME,
170
170
  "add",
171
- path="/queryCountPercentileLast30Days",
171
+ path=("queryCountPercentileLast30Days",),
172
172
  value=count,
173
173
  )
174
174
  return self
@@ -187,7 +187,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
187
187
  self._add_patch(
188
188
  UsageFeaturesClass.ASPECT_NAME,
189
189
  "add",
190
- path="/queryCountRankLast30Days",
190
+ path=("queryCountRankLast30Days",),
191
191
  value=count,
192
192
  )
193
193
  return self
@@ -206,7 +206,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
206
206
  self._add_patch(
207
207
  UsageFeaturesClass.ASPECT_NAME,
208
208
  "add",
209
- path="/uniqueUserPercentileLast30Days",
209
+ path=("uniqueUserPercentileLast30Days",),
210
210
  value=count,
211
211
  )
212
212
  return self
@@ -225,7 +225,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
225
225
  self._add_patch(
226
226
  UsageFeaturesClass.ASPECT_NAME,
227
227
  "add",
228
- path="/uniqueUserRankLast30Days",
228
+ path=("uniqueUserRankLast30Days",),
229
229
  value=count,
230
230
  )
231
231
  return self
@@ -244,7 +244,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
244
244
  self._add_patch(
245
245
  UsageFeaturesClass.ASPECT_NAME,
246
246
  "add",
247
- path="/writeCountPercentileLast30Days",
247
+ path=("writeCountPercentileLast30Days",),
248
248
  value=count,
249
249
  )
250
250
  return self
@@ -263,7 +263,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
263
263
  self._add_patch(
264
264
  UsageFeaturesClass.ASPECT_NAME,
265
265
  "add",
266
- path="/writeCountRankLast30Days",
266
+ path=("writeCountRankLast30Days",),
267
267
  value=count,
268
268
  )
269
269
  return self
@@ -282,7 +282,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
282
282
  self._add_patch(
283
283
  UsageFeaturesClass.ASPECT_NAME,
284
284
  "add",
285
- path="/topUsersLast30Days",
285
+ path=("topUsersLast30Days",),
286
286
  value=users,
287
287
  )
288
288
  return self
@@ -301,7 +301,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
301
301
  self._add_patch(
302
302
  UsageFeaturesClass.ASPECT_NAME,
303
303
  "add",
304
- path="/sizeInBytesPercentile",
304
+ path=("sizeInBytesPercentile",),
305
305
  value=percentile,
306
306
  )
307
307
  return self
@@ -318,7 +318,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
318
318
  self._add_patch(
319
319
  UsageFeaturesClass.ASPECT_NAME,
320
320
  "add",
321
- path="/sizeInBytesRank",
321
+ path=("sizeInBytesRank",),
322
322
  value=rank,
323
323
  )
324
324
  return self
@@ -337,7 +337,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
337
337
  self._add_patch(
338
338
  UsageFeaturesClass.ASPECT_NAME,
339
339
  "add",
340
- path="/rowCountPercentile",
340
+ path=("rowCountPercentile",),
341
341
  value=percentile,
342
342
  )
343
343
  return self
@@ -356,7 +356,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
356
356
  self._add_patch(
357
357
  UsageFeaturesClass.ASPECT_NAME,
358
358
  "add",
359
- path="/usageSearchScoreMultiplier",
359
+ path=("usageSearchScoreMultiplier",),
360
360
  value=multiplier,
361
361
  )
362
362
  return self
@@ -375,7 +375,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
375
375
  self._add_patch(
376
376
  UsageFeaturesClass.ASPECT_NAME,
377
377
  "add",
378
- path="/usageFreshnessScoreMultiplier",
378
+ path=("usageFreshnessScoreMultiplier",),
379
379
  value=multiplier,
380
380
  )
381
381
  return self
@@ -394,7 +394,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
394
394
  self._add_patch(
395
395
  UsageFeaturesClass.ASPECT_NAME,
396
396
  "add",
397
- path="/customDatahubScoreMultiplier",
397
+ path=("customDatahubScoreMultiplier",),
398
398
  value=multiplier,
399
399
  )
400
400
  return self
@@ -413,7 +413,7 @@ class UsageFeaturePatchBuilder(MetadataPatchProposal):
413
413
  self._add_patch(
414
414
  UsageFeaturesClass.ASPECT_NAME,
415
415
  "add",
416
- path="/combinedSearchRankingMultiplier",
416
+ path=("combinedSearchRankingMultiplier",),
417
417
  value=multiplier,
418
418
  )
419
419
  return self