acryl-datahub-cloud 0.3.8.1rc2__py3-none-any.whl → 0.3.8.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +18 -1
- acryl_datahub_cloud/datahub_restore/__init__.py +0 -0
- acryl_datahub_cloud/datahub_restore/do_restore.py +85 -0
- acryl_datahub_cloud/datahub_restore/source.py +139 -0
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1575 -1575
- acryl_datahub_cloud/metadata/schema.avsc +22843 -22830
- acryl_datahub_cloud/metadata/schema_classes.py +522 -505
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +13 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +5 -5
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +18 -5
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2rc2.dist-info}/METADATA +50 -50
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2rc2.dist-info}/RECORD +17 -14
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2rc2.dist-info}/entry_points.txt +1 -0
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2rc2.dist-info}/top_level.txt +0 -0
|
@@ -315,6 +315,11 @@ class DataHubFormReportingData(FormData):
|
|
|
315
315
|
if p in form_prompts
|
|
316
316
|
]:
|
|
317
317
|
for owner in assignees:
|
|
318
|
+
if form_id not in form_assigned_dates:
|
|
319
|
+
logger.warning(
|
|
320
|
+
f"Form {form_id} not found in form_assigned_dates"
|
|
321
|
+
)
|
|
322
|
+
continue
|
|
318
323
|
yield FormReportingRow(
|
|
319
324
|
form_urn=form_id,
|
|
320
325
|
form_assigned_date=form_assigned_dates[form_id],
|
|
@@ -344,6 +349,11 @@ class DataHubFormReportingData(FormData):
|
|
|
344
349
|
if p in form_prompts
|
|
345
350
|
]:
|
|
346
351
|
for owner in assignees:
|
|
352
|
+
if form_id not in form_assigned_dates:
|
|
353
|
+
logger.warning(
|
|
354
|
+
f"Form {form_id} not found in form_assigned_dates"
|
|
355
|
+
)
|
|
356
|
+
continue
|
|
347
357
|
yield FormReportingRow(
|
|
348
358
|
form_urn=form_id,
|
|
349
359
|
form_assigned_date=form_assigned_dates[form_id],
|
|
@@ -403,7 +413,9 @@ class DataHubFormReportingData(FormData):
|
|
|
403
413
|
for p in search_row.completedFormsIncompletePromptIds
|
|
404
414
|
for p in form_prompts
|
|
405
415
|
]:
|
|
406
|
-
logger.warning(
|
|
416
|
+
logger.warning(
|
|
417
|
+
f"Unexpected incomplete prompt {prompt_id} in completed form {form_id}"
|
|
418
|
+
)
|
|
407
419
|
for owner in assignees:
|
|
408
420
|
yield FormReportingRow(
|
|
409
421
|
form_urn=form_id,
|
|
@@ -438,6 +450,11 @@ class DataHubFormReportingData(FormData):
|
|
|
438
450
|
if p in form_prompts
|
|
439
451
|
]:
|
|
440
452
|
for owner in assignees:
|
|
453
|
+
if form_id not in form_assigned_dates:
|
|
454
|
+
logger.warning(
|
|
455
|
+
f"Form {form_id} not found in form_assigned_dates"
|
|
456
|
+
)
|
|
457
|
+
continue
|
|
441
458
|
yield FormReportingRow(
|
|
442
459
|
form_urn=form_id,
|
|
443
460
|
form_assigned_date=form_assigned_dates[form_id],
|
|
File without changes
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional, Union
|
|
2
|
+
|
|
3
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _parse_response(response: Dict) -> Dict:
|
|
7
|
+
value_str = response["value"].strip("{}")
|
|
8
|
+
|
|
9
|
+
args_part = value_str.split(", result=")[0].replace("args=", "")
|
|
10
|
+
result_part = value_str.split(", result=")[1]
|
|
11
|
+
|
|
12
|
+
parsed = {"args": _parse_args(args_part), "result": _parse_result(result_part)}
|
|
13
|
+
return parsed
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _parse_dict(content: str) -> Dict:
|
|
17
|
+
content = content.strip("()").strip()
|
|
18
|
+
items = [item.strip() for item in content.split(", ")]
|
|
19
|
+
|
|
20
|
+
pairs = []
|
|
21
|
+
for item in items:
|
|
22
|
+
if "=" in item:
|
|
23
|
+
k, v = item.split("=", 1)
|
|
24
|
+
pairs.append((k.strip(), v.strip()))
|
|
25
|
+
|
|
26
|
+
return {k: _convert_value(v) for k, v in pairs}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _parse_args(args_str: str) -> Dict:
|
|
30
|
+
content = args_str.replace("RestoreIndicesArgs(", "").replace(")", "").strip()
|
|
31
|
+
return _parse_dict(content)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _parse_result(result_str: str) -> Dict:
|
|
35
|
+
content = result_str.replace("RestoreIndicesResult(", "").replace(")", "").strip()
|
|
36
|
+
return _parse_dict(content)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _convert_value(v: str) -> Any:
|
|
40
|
+
if v == "null":
|
|
41
|
+
return None
|
|
42
|
+
elif v.isdigit():
|
|
43
|
+
return int(v)
|
|
44
|
+
elif v == "[]":
|
|
45
|
+
return []
|
|
46
|
+
elif v == "":
|
|
47
|
+
return ""
|
|
48
|
+
elif v == "false":
|
|
49
|
+
return False
|
|
50
|
+
elif v == "true":
|
|
51
|
+
return True
|
|
52
|
+
elif v.startswith("urn:"):
|
|
53
|
+
return v
|
|
54
|
+
return v
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def restore_indices(
|
|
58
|
+
graph: DataHubGraph,
|
|
59
|
+
start: int,
|
|
60
|
+
batch_size: int,
|
|
61
|
+
limit: int,
|
|
62
|
+
urn: Optional[str] = None,
|
|
63
|
+
urn_like: Optional[str] = None,
|
|
64
|
+
aspect: Optional[str] = None,
|
|
65
|
+
) -> Dict:
|
|
66
|
+
if urn is None and urn_like is None:
|
|
67
|
+
raise RuntimeError("Either urn or urn_like must be present")
|
|
68
|
+
|
|
69
|
+
url = f"{graph.config.server}/operations?action=restoreIndices"
|
|
70
|
+
payload_dict: Dict[str, Union[str, int]] = {
|
|
71
|
+
"start": start,
|
|
72
|
+
"batchSize": batch_size,
|
|
73
|
+
"limit": limit,
|
|
74
|
+
}
|
|
75
|
+
if urn_like is not None:
|
|
76
|
+
payload_dict["urnLike"] = urn_like
|
|
77
|
+
if urn is not None:
|
|
78
|
+
payload_dict["urn"] = urn
|
|
79
|
+
if aspect is not None:
|
|
80
|
+
payload_dict["aspect"] = aspect
|
|
81
|
+
response = graph._post_generic(
|
|
82
|
+
url=url,
|
|
83
|
+
payload_dict=payload_dict,
|
|
84
|
+
)
|
|
85
|
+
return _parse_response(response)
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from functools import partial
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import Field, root_validator
|
|
7
|
+
|
|
8
|
+
from acryl_datahub_cloud.datahub_restore.do_restore import restore_indices
|
|
9
|
+
from datahub.configuration.common import ConfigModel
|
|
10
|
+
from datahub.ingestion.api.common import PipelineContext
|
|
11
|
+
from datahub.ingestion.api.decorators import (
|
|
12
|
+
SupportStatus,
|
|
13
|
+
config_class,
|
|
14
|
+
platform_name,
|
|
15
|
+
support_status,
|
|
16
|
+
)
|
|
17
|
+
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
|
|
18
|
+
from datahub.ingestion.api.source_helpers import auto_workunit_reporter
|
|
19
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
20
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
21
|
+
StatefulIngestionConfigBase,
|
|
22
|
+
StatefulIngestionReport,
|
|
23
|
+
StatefulIngestionSourceBase,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DataHubRestoreIndicesReport(StatefulIngestionReport):
|
|
30
|
+
calls_made: int = 0
|
|
31
|
+
aspect_check_ms: int = 0
|
|
32
|
+
create_record_ms: int = 0
|
|
33
|
+
time_get_rows_ms: int = 0
|
|
34
|
+
time_sql_query_ms: int = 0
|
|
35
|
+
rows_migrated: int = 0
|
|
36
|
+
send_message_ms: int = 0
|
|
37
|
+
time_urn_ms: int = 0
|
|
38
|
+
default_aspects_created: int = 0
|
|
39
|
+
ignored: int = 0
|
|
40
|
+
last_aspect: str = ""
|
|
41
|
+
last_urn: str = ""
|
|
42
|
+
time_entity_registry_check_ms: int = 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class DataHubRestoreIndicesConfig(ConfigModel, StatefulIngestionConfigBase):
|
|
46
|
+
urn: Optional[str] = Field(
|
|
47
|
+
None,
|
|
48
|
+
description="The urn of the entity to restore indices for. If not provided, urn_like must be provided.",
|
|
49
|
+
)
|
|
50
|
+
urn_like: Optional[str] = Field(
|
|
51
|
+
None,
|
|
52
|
+
description="The urn_like of the entity to restore indices for. If not provided, urn must be provided.",
|
|
53
|
+
)
|
|
54
|
+
start: int = Field(
|
|
55
|
+
0,
|
|
56
|
+
description="Same as restore indices endpoint.",
|
|
57
|
+
)
|
|
58
|
+
batch_size: int = Field(
|
|
59
|
+
1,
|
|
60
|
+
description="Same as restore indices endpoint.",
|
|
61
|
+
)
|
|
62
|
+
aspect: Optional[str] = Field(
|
|
63
|
+
None,
|
|
64
|
+
description="Same as restore indices endpoint.",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@root_validator(pre=True)
|
|
68
|
+
def extract_assertion_info(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
69
|
+
if values.get("urn") is None and values.get("urn_like") is None:
|
|
70
|
+
raise ValueError("Either urn or urn_like must be provided.")
|
|
71
|
+
if values.get("urn") is not None and values.get("urn_like") is not None:
|
|
72
|
+
raise ValueError("Only one of urn or urn_like must be provided.")
|
|
73
|
+
return values
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@platform_name(id="datahub", platform_name="datahub")
|
|
77
|
+
@config_class(DataHubRestoreIndicesConfig)
|
|
78
|
+
@support_status(SupportStatus.INCUBATING)
|
|
79
|
+
class DataHubRestoreSource(StatefulIngestionSourceBase):
|
|
80
|
+
def __init__(self, ctx: PipelineContext, config: DataHubRestoreIndicesConfig):
|
|
81
|
+
super().__init__(config=config, ctx=ctx)
|
|
82
|
+
self.report: DataHubRestoreIndicesReport = DataHubRestoreIndicesReport()
|
|
83
|
+
self.report.event_not_produced_warn = False
|
|
84
|
+
self.config = config
|
|
85
|
+
self.graph = ctx.require_graph("The DataHubRestore Source")
|
|
86
|
+
self.last_print_time = 0.0
|
|
87
|
+
|
|
88
|
+
def get_report(self) -> SourceReport:
|
|
89
|
+
return self.report
|
|
90
|
+
|
|
91
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
92
|
+
"""A list of functions that transforms the workunits produced by this source.
|
|
93
|
+
Run in order, first in list is applied first. Be careful with order when overriding.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
return [
|
|
97
|
+
partial(auto_workunit_reporter, self.get_report()),
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
def _print_report(self) -> None:
|
|
101
|
+
time_taken = round(time.time() - self.last_print_time, 1)
|
|
102
|
+
if time_taken > 60:
|
|
103
|
+
self.last_print_time = time.time()
|
|
104
|
+
logger.info(f"\n{self.report.as_string()}")
|
|
105
|
+
|
|
106
|
+
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
107
|
+
start = self.config.start
|
|
108
|
+
while True:
|
|
109
|
+
self.report.calls_made += 1
|
|
110
|
+
response = restore_indices(
|
|
111
|
+
graph=self.graph,
|
|
112
|
+
start=start,
|
|
113
|
+
batch_size=self.config.batch_size,
|
|
114
|
+
limit=self.config.batch_size,
|
|
115
|
+
urn=self.config.urn,
|
|
116
|
+
urn_like=self.config.urn_like,
|
|
117
|
+
aspect=self.config.aspect,
|
|
118
|
+
)
|
|
119
|
+
result = response["result"]
|
|
120
|
+
self.report.aspect_check_ms += result["aspectCheckMs"]
|
|
121
|
+
self.report.create_record_ms += result["createRecordMs"]
|
|
122
|
+
self.report.rows_migrated += result["rowsMigrated"]
|
|
123
|
+
self.report.send_message_ms += result["sendMessageMs"]
|
|
124
|
+
self.report.time_get_rows_ms += result["timeGetRowMs"]
|
|
125
|
+
self.report.time_sql_query_ms += result["timeSqlQueryMs"]
|
|
126
|
+
self.report.time_urn_ms += result["timeUrnMs"]
|
|
127
|
+
self.report.default_aspects_created += result["defaultAspectsCreated"]
|
|
128
|
+
self.report.ignored += result["ignored"]
|
|
129
|
+
self.report.time_entity_registry_check_ms += result[
|
|
130
|
+
"timeEntityRegistryCheckMs"
|
|
131
|
+
]
|
|
132
|
+
self.report.last_aspect = result["lastAspect"]
|
|
133
|
+
self.report.last_urn = result["lastUrn"]
|
|
134
|
+
self._print_report()
|
|
135
|
+
if result["rowsMigrated"] == self.config.batch_size:
|
|
136
|
+
start += self.config.batch_size
|
|
137
|
+
else:
|
|
138
|
+
break
|
|
139
|
+
yield from []
|