acryl-datahub 0.15.0.1rc11__py3-none-any.whl → 0.15.0.1rc13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (40) hide show
  1. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/METADATA +2320 -2324
  2. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/RECORD +40 -39
  3. datahub/__init__.py +1 -1
  4. datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
  5. datahub/configuration/common.py +2 -5
  6. datahub/emitter/mce_builder.py +17 -1
  7. datahub/emitter/mcp_builder.py +2 -7
  8. datahub/emitter/mcp_patch_builder.py +2 -2
  9. datahub/emitter/rest_emitter.py +2 -2
  10. datahub/ingestion/api/closeable.py +3 -3
  11. datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
  12. datahub/ingestion/api/report.py +4 -1
  13. datahub/ingestion/api/sink.py +4 -3
  14. datahub/ingestion/api/source_helpers.py +2 -6
  15. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
  16. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
  17. datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
  18. datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
  19. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +159 -71
  20. datahub/ingestion/source/s3/source.py +1 -1
  21. datahub/ingestion/source/sql/hive.py +15 -0
  22. datahub/ingestion/source/sql/hive_metastore.py +7 -0
  23. datahub/ingestion/source/sql/mssql/source.py +1 -1
  24. datahub/ingestion/source/sql/sql_common.py +41 -102
  25. datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
  26. datahub/ingestion/source/sql/sql_report.py +2 -0
  27. datahub/ingestion/source/state/checkpoint.py +2 -1
  28. datahub/ingestion/source/tableau/tableau.py +1 -4
  29. datahub/ingestion/source/unity/proxy.py +8 -27
  30. datahub/metadata/_schema_classes.py +61 -1
  31. datahub/metadata/_urns/urn_defs.py +168 -168
  32. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
  33. datahub/metadata/schema.avsc +64 -29
  34. datahub/metadata/schemas/DataJobKey.avsc +2 -1
  35. datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
  36. datahub/utilities/time.py +8 -3
  37. datahub/utilities/urns/_urn_base.py +5 -7
  38. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/WHEEL +0 -0
  39. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/entry_points.txt +0 -0
  40. {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/top_level.txt +0 -0
@@ -19,6 +19,8 @@ from .....schema_classes import CostCostClass
19
19
  from .....schema_classes import CostCostDiscriminatorClass
20
20
  from .....schema_classes import CostTypeClass
21
21
  from .....schema_classes import DataPlatformInstanceClass
22
+ from .....schema_classes import DataTransformClass
23
+ from .....schema_classes import DataTransformLogicClass
22
24
  from .....schema_classes import DeprecationClass
23
25
  from .....schema_classes import DocumentationClass
24
26
  from .....schema_classes import DocumentationAssociationClass
@@ -79,6 +81,8 @@ CostCost = CostCostClass
79
81
  CostCostDiscriminator = CostCostDiscriminatorClass
80
82
  CostType = CostTypeClass
81
83
  DataPlatformInstance = DataPlatformInstanceClass
84
+ DataTransform = DataTransformClass
85
+ DataTransformLogic = DataTransformLogicClass
82
86
  Deprecation = DeprecationClass
83
87
  Documentation = DocumentationClass
84
88
  DocumentationAssociation = DocumentationAssociationClass
@@ -400,6 +400,69 @@
400
400
  ],
401
401
  "doc": "Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity."
402
402
  },
403
+ {
404
+ "type": "record",
405
+ "Aspect": {
406
+ "name": "dataTransformLogic"
407
+ },
408
+ "name": "DataTransformLogic",
409
+ "namespace": "com.linkedin.pegasus2avro.common",
410
+ "fields": [
411
+ {
412
+ "type": {
413
+ "type": "array",
414
+ "items": {
415
+ "type": "record",
416
+ "name": "DataTransform",
417
+ "namespace": "com.linkedin.pegasus2avro.common",
418
+ "fields": [
419
+ {
420
+ "type": [
421
+ "null",
422
+ {
423
+ "type": "record",
424
+ "name": "QueryStatement",
425
+ "namespace": "com.linkedin.pegasus2avro.query",
426
+ "fields": [
427
+ {
428
+ "type": "string",
429
+ "name": "value",
430
+ "doc": "The query text"
431
+ },
432
+ {
433
+ "type": {
434
+ "type": "enum",
435
+ "symbolDocs": {
436
+ "SQL": "A SQL Query"
437
+ },
438
+ "name": "QueryLanguage",
439
+ "namespace": "com.linkedin.pegasus2avro.query",
440
+ "symbols": [
441
+ "SQL"
442
+ ]
443
+ },
444
+ "name": "language",
445
+ "default": "SQL",
446
+ "doc": "The language of the Query, e.g. SQL."
447
+ }
448
+ ],
449
+ "doc": "A query statement against one or more data assets."
450
+ }
451
+ ],
452
+ "name": "queryStatement",
453
+ "default": null,
454
+ "doc": "The data transform may be defined by a query statement"
455
+ }
456
+ ],
457
+ "doc": "Information about a transformation. It may be a query,"
458
+ }
459
+ },
460
+ "name": "transforms",
461
+ "doc": "List of transformations applied"
462
+ }
463
+ ],
464
+ "doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
465
+ },
403
466
  {
404
467
  "type": "record",
405
468
  "Aspect": {
@@ -11947,35 +12010,7 @@
11947
12010
  "namespace": "com.linkedin.pegasus2avro.query",
11948
12011
  "fields": [
11949
12012
  {
11950
- "type": {
11951
- "type": "record",
11952
- "name": "QueryStatement",
11953
- "namespace": "com.linkedin.pegasus2avro.query",
11954
- "fields": [
11955
- {
11956
- "type": "string",
11957
- "name": "value",
11958
- "doc": "The query text"
11959
- },
11960
- {
11961
- "type": {
11962
- "type": "enum",
11963
- "symbolDocs": {
11964
- "SQL": "A SQL Query"
11965
- },
11966
- "name": "QueryLanguage",
11967
- "namespace": "com.linkedin.pegasus2avro.query",
11968
- "symbols": [
11969
- "SQL"
11970
- ]
11971
- },
11972
- "name": "language",
11973
- "default": "SQL",
11974
- "doc": "The language of the Query, e.g. SQL."
11975
- }
11976
- ],
11977
- "doc": "A query statement against one or more data assets."
11978
- },
12013
+ "type": "com.linkedin.pegasus2avro.query.QueryStatement",
11979
12014
  "name": "statement",
11980
12015
  "doc": "The Query Statement."
11981
12016
  },
@@ -25,7 +25,8 @@
25
25
  "forms",
26
26
  "subTypes",
27
27
  "incidentsSummary",
28
- "testResults"
28
+ "testResults",
29
+ "dataTransformLogic"
29
30
  ]
30
31
  },
31
32
  "name": "DataJobKey",
@@ -0,0 +1,63 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "dataTransformLogic"
5
+ },
6
+ "name": "DataTransformLogic",
7
+ "namespace": "com.linkedin.pegasus2avro.common",
8
+ "fields": [
9
+ {
10
+ "type": {
11
+ "type": "array",
12
+ "items": {
13
+ "type": "record",
14
+ "name": "DataTransform",
15
+ "namespace": "com.linkedin.pegasus2avro.common",
16
+ "fields": [
17
+ {
18
+ "type": [
19
+ "null",
20
+ {
21
+ "type": "record",
22
+ "name": "QueryStatement",
23
+ "namespace": "com.linkedin.pegasus2avro.query",
24
+ "fields": [
25
+ {
26
+ "type": "string",
27
+ "name": "value",
28
+ "doc": "The query text"
29
+ },
30
+ {
31
+ "type": {
32
+ "type": "enum",
33
+ "symbolDocs": {
34
+ "SQL": "A SQL Query"
35
+ },
36
+ "name": "QueryLanguage",
37
+ "namespace": "com.linkedin.pegasus2avro.query",
38
+ "symbols": [
39
+ "SQL"
40
+ ]
41
+ },
42
+ "name": "language",
43
+ "default": "SQL",
44
+ "doc": "The language of the Query, e.g. SQL."
45
+ }
46
+ ],
47
+ "doc": "A query statement against one or more data assets."
48
+ }
49
+ ],
50
+ "name": "queryStatement",
51
+ "default": null,
52
+ "doc": "The data transform may be defined by a query statement"
53
+ }
54
+ ],
55
+ "doc": "Information about a transformation. It may be a query,"
56
+ }
57
+ },
58
+ "name": "transforms",
59
+ "doc": "List of transformations applied"
60
+ }
61
+ ],
62
+ "doc": "Information about a Query against one or more data assets (e.g. Tables or Views)."
63
+ }
datahub/utilities/time.py CHANGED
@@ -1,6 +1,8 @@
1
1
  import time
2
2
  from dataclasses import dataclass
3
- from datetime import datetime, timezone
3
+ from datetime import datetime
4
+
5
+ from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
4
6
 
5
7
 
6
8
  def get_current_time_in_seconds() -> int:
@@ -9,12 +11,15 @@ def get_current_time_in_seconds() -> int:
9
11
 
10
12
  def ts_millis_to_datetime(ts_millis: int) -> datetime:
11
13
  """Converts input timestamp in milliseconds to a datetime object with UTC timezone"""
12
- return datetime.fromtimestamp(ts_millis / 1000, tz=timezone.utc)
14
+ return parse_ts_millis(ts_millis)
13
15
 
14
16
 
15
17
  def datetime_to_ts_millis(dt: datetime) -> int:
16
18
  """Converts a datetime object to timestamp in milliseconds"""
17
- return int(round(dt.timestamp() * 1000))
19
+ # TODO: Deprecate these helpers in favor of make_ts_millis and parse_ts_millis.
20
+ # The other ones support None with a typing overload.
21
+ # Also possibly move those helpers to this file.
22
+ return make_ts_millis(dt)
18
23
 
19
24
 
20
25
  @dataclass
@@ -1,9 +1,10 @@
1
1
  import functools
2
2
  import urllib.parse
3
3
  from abc import abstractmethod
4
- from typing import ClassVar, Dict, List, Optional, Type, TypeVar
4
+ from typing import ClassVar, Dict, List, Optional, Type
5
5
 
6
6
  from deprecated import deprecated
7
+ from typing_extensions import Self
7
8
 
8
9
  from datahub.utilities.urns.error import InvalidUrnError
9
10
 
@@ -42,9 +43,6 @@ def _split_entity_id(entity_id: str) -> List[str]:
42
43
  return parts
43
44
 
44
45
 
45
- _UrnSelf = TypeVar("_UrnSelf", bound="Urn")
46
-
47
-
48
46
  @functools.total_ordering
49
47
  class Urn:
50
48
  """
@@ -88,7 +86,7 @@ class Urn:
88
86
  return self._entity_ids
89
87
 
90
88
  @classmethod
91
- def from_string(cls: Type[_UrnSelf], urn_str: str) -> "_UrnSelf":
89
+ def from_string(cls, urn_str: str) -> Self:
92
90
  """
93
91
  Creates an Urn from its string representation.
94
92
 
@@ -174,7 +172,7 @@ class Urn:
174
172
 
175
173
  @classmethod
176
174
  @deprecated(reason="prefer .from_string")
177
- def create_from_string(cls: Type[_UrnSelf], urn_str: str) -> "_UrnSelf":
175
+ def create_from_string(cls, urn_str: str) -> Self:
178
176
  return cls.from_string(urn_str)
179
177
 
180
178
  @deprecated(reason="prefer .entity_ids")
@@ -270,5 +268,5 @@ class _SpecificUrn(Urn):
270
268
 
271
269
  @classmethod
272
270
  @abstractmethod
273
- def _parse_ids(cls: Type[_UrnSelf], entity_ids: List[str]) -> _UrnSelf:
271
+ def _parse_ids(cls, entity_ids: List[str]) -> Self:
274
272
  raise NotImplementedError()