acryl-datahub 0.15.0.1rc11__py3-none-any.whl → 0.15.0.1rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/METADATA +2320 -2324
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/RECORD +40 -39
- datahub/__init__.py +1 -1
- datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
- datahub/configuration/common.py +2 -5
- datahub/emitter/mce_builder.py +17 -1
- datahub/emitter/mcp_builder.py +2 -7
- datahub/emitter/mcp_patch_builder.py +2 -2
- datahub/emitter/rest_emitter.py +2 -2
- datahub/ingestion/api/closeable.py +3 -3
- datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
- datahub/ingestion/api/report.py +4 -1
- datahub/ingestion/api/sink.py +4 -3
- datahub/ingestion/api/source_helpers.py +2 -6
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
- datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
- datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +159 -71
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/sql/hive.py +15 -0
- datahub/ingestion/source/sql/hive_metastore.py +7 -0
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/sql_common.py +41 -102
- datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
- datahub/ingestion/source/sql/sql_report.py +2 -0
- datahub/ingestion/source/state/checkpoint.py +2 -1
- datahub/ingestion/source/tableau/tableau.py +1 -4
- datahub/ingestion/source/unity/proxy.py +8 -27
- datahub/metadata/_schema_classes.py +61 -1
- datahub/metadata/_urns/urn_defs.py +168 -168
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
- datahub/metadata/schema.avsc +64 -29
- datahub/metadata/schemas/DataJobKey.avsc +2 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
- datahub/utilities/time.py +8 -3
- datahub/utilities/urns/_urn_base.py +5 -7
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,8 @@ from .....schema_classes import CostCostClass
|
|
|
19
19
|
from .....schema_classes import CostCostDiscriminatorClass
|
|
20
20
|
from .....schema_classes import CostTypeClass
|
|
21
21
|
from .....schema_classes import DataPlatformInstanceClass
|
|
22
|
+
from .....schema_classes import DataTransformClass
|
|
23
|
+
from .....schema_classes import DataTransformLogicClass
|
|
22
24
|
from .....schema_classes import DeprecationClass
|
|
23
25
|
from .....schema_classes import DocumentationClass
|
|
24
26
|
from .....schema_classes import DocumentationAssociationClass
|
|
@@ -79,6 +81,8 @@ CostCost = CostCostClass
|
|
|
79
81
|
CostCostDiscriminator = CostCostDiscriminatorClass
|
|
80
82
|
CostType = CostTypeClass
|
|
81
83
|
DataPlatformInstance = DataPlatformInstanceClass
|
|
84
|
+
DataTransform = DataTransformClass
|
|
85
|
+
DataTransformLogic = DataTransformLogicClass
|
|
82
86
|
Deprecation = DeprecationClass
|
|
83
87
|
Documentation = DocumentationClass
|
|
84
88
|
DocumentationAssociation = DocumentationAssociationClass
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -400,6 +400,69 @@
|
|
|
400
400
|
],
|
|
401
401
|
"doc": "Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity."
|
|
402
402
|
},
|
|
403
|
+
{
|
|
404
|
+
"type": "record",
|
|
405
|
+
"Aspect": {
|
|
406
|
+
"name": "dataTransformLogic"
|
|
407
|
+
},
|
|
408
|
+
"name": "DataTransformLogic",
|
|
409
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
410
|
+
"fields": [
|
|
411
|
+
{
|
|
412
|
+
"type": {
|
|
413
|
+
"type": "array",
|
|
414
|
+
"items": {
|
|
415
|
+
"type": "record",
|
|
416
|
+
"name": "DataTransform",
|
|
417
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
418
|
+
"fields": [
|
|
419
|
+
{
|
|
420
|
+
"type": [
|
|
421
|
+
"null",
|
|
422
|
+
{
|
|
423
|
+
"type": "record",
|
|
424
|
+
"name": "QueryStatement",
|
|
425
|
+
"namespace": "com.linkedin.pegasus2avro.query",
|
|
426
|
+
"fields": [
|
|
427
|
+
{
|
|
428
|
+
"type": "string",
|
|
429
|
+
"name": "value",
|
|
430
|
+
"doc": "The query text"
|
|
431
|
+
},
|
|
432
|
+
{
|
|
433
|
+
"type": {
|
|
434
|
+
"type": "enum",
|
|
435
|
+
"symbolDocs": {
|
|
436
|
+
"SQL": "A SQL Query"
|
|
437
|
+
},
|
|
438
|
+
"name": "QueryLanguage",
|
|
439
|
+
"namespace": "com.linkedin.pegasus2avro.query",
|
|
440
|
+
"symbols": [
|
|
441
|
+
"SQL"
|
|
442
|
+
]
|
|
443
|
+
},
|
|
444
|
+
"name": "language",
|
|
445
|
+
"default": "SQL",
|
|
446
|
+
"doc": "The language of the Query, e.g. SQL."
|
|
447
|
+
}
|
|
448
|
+
],
|
|
449
|
+
"doc": "A query statement against one or more data assets."
|
|
450
|
+
}
|
|
451
|
+
],
|
|
452
|
+
"name": "queryStatement",
|
|
453
|
+
"default": null,
|
|
454
|
+
"doc": "The data transform may be defined by a query statement"
|
|
455
|
+
}
|
|
456
|
+
],
|
|
457
|
+
"doc": "Information about a transformation. It may be a query,"
|
|
458
|
+
}
|
|
459
|
+
},
|
|
460
|
+
"name": "transforms",
|
|
461
|
+
"doc": "List of transformations applied"
|
|
462
|
+
}
|
|
463
|
+
],
|
|
464
|
+
"doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
|
|
465
|
+
},
|
|
403
466
|
{
|
|
404
467
|
"type": "record",
|
|
405
468
|
"Aspect": {
|
|
@@ -11947,35 +12010,7 @@
|
|
|
11947
12010
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
11948
12011
|
"fields": [
|
|
11949
12012
|
{
|
|
11950
|
-
"type":
|
|
11951
|
-
"type": "record",
|
|
11952
|
-
"name": "QueryStatement",
|
|
11953
|
-
"namespace": "com.linkedin.pegasus2avro.query",
|
|
11954
|
-
"fields": [
|
|
11955
|
-
{
|
|
11956
|
-
"type": "string",
|
|
11957
|
-
"name": "value",
|
|
11958
|
-
"doc": "The query text"
|
|
11959
|
-
},
|
|
11960
|
-
{
|
|
11961
|
-
"type": {
|
|
11962
|
-
"type": "enum",
|
|
11963
|
-
"symbolDocs": {
|
|
11964
|
-
"SQL": "A SQL Query"
|
|
11965
|
-
},
|
|
11966
|
-
"name": "QueryLanguage",
|
|
11967
|
-
"namespace": "com.linkedin.pegasus2avro.query",
|
|
11968
|
-
"symbols": [
|
|
11969
|
-
"SQL"
|
|
11970
|
-
]
|
|
11971
|
-
},
|
|
11972
|
-
"name": "language",
|
|
11973
|
-
"default": "SQL",
|
|
11974
|
-
"doc": "The language of the Query, e.g. SQL."
|
|
11975
|
-
}
|
|
11976
|
-
],
|
|
11977
|
-
"doc": "A query statement against one or more data assets."
|
|
11978
|
-
},
|
|
12013
|
+
"type": "com.linkedin.pegasus2avro.query.QueryStatement",
|
|
11979
12014
|
"name": "statement",
|
|
11980
12015
|
"doc": "The Query Statement."
|
|
11981
12016
|
},
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "dataTransformLogic"
|
|
5
|
+
},
|
|
6
|
+
"name": "DataTransformLogic",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"type": {
|
|
11
|
+
"type": "array",
|
|
12
|
+
"items": {
|
|
13
|
+
"type": "record",
|
|
14
|
+
"name": "DataTransform",
|
|
15
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
16
|
+
"fields": [
|
|
17
|
+
{
|
|
18
|
+
"type": [
|
|
19
|
+
"null",
|
|
20
|
+
{
|
|
21
|
+
"type": "record",
|
|
22
|
+
"name": "QueryStatement",
|
|
23
|
+
"namespace": "com.linkedin.pegasus2avro.query",
|
|
24
|
+
"fields": [
|
|
25
|
+
{
|
|
26
|
+
"type": "string",
|
|
27
|
+
"name": "value",
|
|
28
|
+
"doc": "The query text"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"type": {
|
|
32
|
+
"type": "enum",
|
|
33
|
+
"symbolDocs": {
|
|
34
|
+
"SQL": "A SQL Query"
|
|
35
|
+
},
|
|
36
|
+
"name": "QueryLanguage",
|
|
37
|
+
"namespace": "com.linkedin.pegasus2avro.query",
|
|
38
|
+
"symbols": [
|
|
39
|
+
"SQL"
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
"name": "language",
|
|
43
|
+
"default": "SQL",
|
|
44
|
+
"doc": "The language of the Query, e.g. SQL."
|
|
45
|
+
}
|
|
46
|
+
],
|
|
47
|
+
"doc": "A query statement against one or more data assets."
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
"name": "queryStatement",
|
|
51
|
+
"default": null,
|
|
52
|
+
"doc": "The data transform may be defined by a query statement"
|
|
53
|
+
}
|
|
54
|
+
],
|
|
55
|
+
"doc": "Information about a transformation. It may be a query,"
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"name": "transforms",
|
|
59
|
+
"doc": "List of transformations applied"
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
"doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
|
|
63
|
+
}
|
datahub/utilities/time.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from datetime import datetime
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
def get_current_time_in_seconds() -> int:
|
|
@@ -9,12 +11,15 @@ def get_current_time_in_seconds() -> int:
|
|
|
9
11
|
|
|
10
12
|
def ts_millis_to_datetime(ts_millis: int) -> datetime:
|
|
11
13
|
"""Converts input timestamp in milliseconds to a datetime object with UTC timezone"""
|
|
12
|
-
return
|
|
14
|
+
return parse_ts_millis(ts_millis)
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
def datetime_to_ts_millis(dt: datetime) -> int:
|
|
16
18
|
"""Converts a datetime object to timestamp in milliseconds"""
|
|
17
|
-
|
|
19
|
+
# TODO: Deprecate these helpers in favor of make_ts_millis and parse_ts_millis.
|
|
20
|
+
# The other ones support None with a typing overload.
|
|
21
|
+
# Also possibly move those helpers to this file.
|
|
22
|
+
return make_ts_millis(dt)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
@dataclass
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import urllib.parse
|
|
3
3
|
from abc import abstractmethod
|
|
4
|
-
from typing import ClassVar, Dict, List, Optional, Type
|
|
4
|
+
from typing import ClassVar, Dict, List, Optional, Type
|
|
5
5
|
|
|
6
6
|
from deprecated import deprecated
|
|
7
|
+
from typing_extensions import Self
|
|
7
8
|
|
|
8
9
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
9
10
|
|
|
@@ -42,9 +43,6 @@ def _split_entity_id(entity_id: str) -> List[str]:
|
|
|
42
43
|
return parts
|
|
43
44
|
|
|
44
45
|
|
|
45
|
-
_UrnSelf = TypeVar("_UrnSelf", bound="Urn")
|
|
46
|
-
|
|
47
|
-
|
|
48
46
|
@functools.total_ordering
|
|
49
47
|
class Urn:
|
|
50
48
|
"""
|
|
@@ -88,7 +86,7 @@ class Urn:
|
|
|
88
86
|
return self._entity_ids
|
|
89
87
|
|
|
90
88
|
@classmethod
|
|
91
|
-
def from_string(cls
|
|
89
|
+
def from_string(cls, urn_str: str) -> Self:
|
|
92
90
|
"""
|
|
93
91
|
Creates an Urn from its string representation.
|
|
94
92
|
|
|
@@ -174,7 +172,7 @@ class Urn:
|
|
|
174
172
|
|
|
175
173
|
@classmethod
|
|
176
174
|
@deprecated(reason="prefer .from_string")
|
|
177
|
-
def create_from_string(cls
|
|
175
|
+
def create_from_string(cls, urn_str: str) -> Self:
|
|
178
176
|
return cls.from_string(urn_str)
|
|
179
177
|
|
|
180
178
|
@deprecated(reason="prefer .entity_ids")
|
|
@@ -270,5 +268,5 @@ class _SpecificUrn(Urn):
|
|
|
270
268
|
|
|
271
269
|
@classmethod
|
|
272
270
|
@abstractmethod
|
|
273
|
-
def _parse_ids(cls
|
|
271
|
+
def _parse_ids(cls, entity_ids: List[str]) -> Self:
|
|
274
272
|
raise NotImplementedError()
|
|
File without changes
|
{acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|