acryl-datahub 1.0.0rc13__py3-none-any.whl → 1.0.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (45) hide show
  1. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/METADATA +2540 -2540
  2. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/RECORD +45 -45
  3. datahub/_version.py +1 -1
  4. datahub/configuration/common.py +1 -1
  5. datahub/emitter/rest_emitter.py +165 -10
  6. datahub/ingestion/glossary/classification_mixin.py +1 -5
  7. datahub/ingestion/graph/client.py +6 -3
  8. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
  9. datahub/ingestion/run/pipeline.py +2 -4
  10. datahub/ingestion/sink/datahub_rest.py +4 -0
  11. datahub/ingestion/source/common/subtypes.py +5 -0
  12. datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
  13. datahub/ingestion/source/dbt/dbt_common.py +2 -4
  14. datahub/ingestion/source/dbt/dbt_tests.py +4 -8
  15. datahub/ingestion/source/dremio/dremio_api.py +1 -5
  16. datahub/ingestion/source/dremio/dremio_aspects.py +1 -4
  17. datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
  18. datahub/ingestion/source/ge_data_profiler.py +1 -1
  19. datahub/ingestion/source/kafka_connect/common.py +1 -6
  20. datahub/ingestion/source/mlflow.py +338 -31
  21. datahub/ingestion/source/redshift/lineage.py +2 -2
  22. datahub/ingestion/source/redshift/lineage_v2.py +19 -7
  23. datahub/ingestion/source/redshift/profile.py +1 -1
  24. datahub/ingestion/source/redshift/query.py +14 -6
  25. datahub/ingestion/source/redshift/redshift.py +9 -5
  26. datahub/ingestion/source/redshift/redshift_schema.py +27 -7
  27. datahub/ingestion/source/sql/athena.py +6 -12
  28. datahub/ingestion/source/sql/hive.py +2 -6
  29. datahub/ingestion/source/sql/hive_metastore.py +2 -1
  30. datahub/ingestion/source/sql/sql_common.py +3 -9
  31. datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
  32. datahub/ingestion/source/superset.py +1 -3
  33. datahub/ingestion/source/tableau/tableau_common.py +1 -1
  34. datahub/ingestion/source/unity/ge_profiler.py +2 -1
  35. datahub/lite/duckdb_lite.py +1 -3
  36. datahub/metadata/_schema_classes.py +31 -1
  37. datahub/metadata/schema.avsc +56 -4
  38. datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
  39. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
  40. datahub/sdk/dataset.py +2 -2
  41. datahub/sql_parsing/sqlglot_utils.py +1 -4
  42. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/LICENSE +0 -0
  43. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/WHEEL +0 -0
  44. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/entry_points.txt +0 -0
  45. {acryl_datahub-1.0.0rc13.dist-info → acryl_datahub-1.0.0rc15.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@
13
13
  "dataset",
14
14
  "mlModel"
15
15
  ],
16
- "isLineage": true,
17
16
  "name": "Consumes"
18
17
  }
19
18
  },
@@ -34,6 +33,135 @@
34
33
  "doc": "Input assets consumed",
35
34
  "Urn": "Urn",
36
35
  "urn_is_array": true
36
+ },
37
+ {
38
+ "Relationship": {
39
+ "/*/destinationUrn": {
40
+ "createdActor": "inputEdges/*/created/actor",
41
+ "createdOn": "inputEdges/*/created/time",
42
+ "entityTypes": [
43
+ "dataset",
44
+ "mlModel"
45
+ ],
46
+ "isLineage": true,
47
+ "name": "DataProcessInstanceConsumes",
48
+ "properties": "inputEdges/*/properties",
49
+ "updatedActor": "inputEdges/*/lastModified/actor",
50
+ "updatedOn": "inputEdges/*/lastModified/time"
51
+ }
52
+ },
53
+ "type": [
54
+ "null",
55
+ {
56
+ "type": "array",
57
+ "items": {
58
+ "type": "record",
59
+ "name": "Edge",
60
+ "namespace": "com.linkedin.pegasus2avro.common",
61
+ "fields": [
62
+ {
63
+ "java": {
64
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
65
+ },
66
+ "type": [
67
+ "null",
68
+ "string"
69
+ ],
70
+ "name": "sourceUrn",
71
+ "default": null,
72
+ "doc": "Urn of the source of this relationship edge.\nIf not specified, assumed to be the entity that this aspect belongs to.",
73
+ "Urn": "Urn"
74
+ },
75
+ {
76
+ "java": {
77
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
78
+ },
79
+ "type": "string",
80
+ "name": "destinationUrn",
81
+ "doc": "Urn of the destination of this relationship edge.",
82
+ "Urn": "Urn"
83
+ },
84
+ {
85
+ "type": [
86
+ "null",
87
+ {
88
+ "type": "record",
89
+ "name": "AuditStamp",
90
+ "namespace": "com.linkedin.pegasus2avro.common",
91
+ "fields": [
92
+ {
93
+ "type": "long",
94
+ "name": "time",
95
+ "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
96
+ },
97
+ {
98
+ "java": {
99
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
100
+ },
101
+ "type": "string",
102
+ "name": "actor",
103
+ "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
104
+ "Urn": "Urn"
105
+ },
106
+ {
107
+ "java": {
108
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
109
+ },
110
+ "type": [
111
+ "null",
112
+ "string"
113
+ ],
114
+ "name": "impersonator",
115
+ "default": null,
116
+ "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
117
+ "Urn": "Urn"
118
+ },
119
+ {
120
+ "type": [
121
+ "null",
122
+ "string"
123
+ ],
124
+ "name": "message",
125
+ "default": null,
126
+ "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
127
+ }
128
+ ],
129
+ "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
130
+ }
131
+ ],
132
+ "name": "created",
133
+ "default": null,
134
+ "doc": "Audit stamp containing who created this relationship edge and when"
135
+ },
136
+ {
137
+ "type": [
138
+ "null",
139
+ "com.linkedin.pegasus2avro.common.AuditStamp"
140
+ ],
141
+ "name": "lastModified",
142
+ "default": null,
143
+ "doc": "Audit stamp containing who last modified this relationship edge and when"
144
+ },
145
+ {
146
+ "type": [
147
+ "null",
148
+ {
149
+ "type": "map",
150
+ "values": "string"
151
+ }
152
+ ],
153
+ "name": "properties",
154
+ "default": null,
155
+ "doc": "A generic properties bag that allows us to store specific information on this graph edge."
156
+ }
157
+ ],
158
+ "doc": "A common structure to represent all edges to entities when used inside aspects as collections\nThis ensures that all edges have common structure around audit-stamps and will support PATCH, time-travel automatically."
159
+ }
160
+ }
161
+ ],
162
+ "name": "inputEdges",
163
+ "default": null,
164
+ "doc": "Input assets consumed by the data process instance, with additional metadata.\nCounts as lineage.\nWill eventually deprecate the inputs field."
37
165
  }
38
166
  ],
39
167
  "doc": "Information about the inputs datasets of a Data process"
@@ -13,8 +13,6 @@
13
13
  "dataset",
14
14
  "mlModel"
15
15
  ],
16
- "isLineage": true,
17
- "isUpstream": false,
18
16
  "name": "Produces"
19
17
  }
20
18
  },
@@ -32,9 +30,139 @@
32
30
  "items": "string"
33
31
  },
34
32
  "name": "outputs",
35
- "doc": "Output datasets to be produced",
33
+ "doc": "Output assets produced",
36
34
  "Urn": "Urn",
37
35
  "urn_is_array": true
36
+ },
37
+ {
38
+ "Relationship": {
39
+ "/*/destinationUrn": {
40
+ "createdActor": "outputEdges/*/created/actor",
41
+ "createdOn": "outputEdges/*/created/time",
42
+ "entityTypes": [
43
+ "dataset",
44
+ "mlModel"
45
+ ],
46
+ "isLineage": true,
47
+ "isUpstream": false,
48
+ "name": "DataProcessInstanceProduces",
49
+ "properties": "outputEdges/*/properties",
50
+ "updatedActor": "outputEdges/*/lastModified/actor",
51
+ "updatedOn": "outputEdges/*/lastModified/time"
52
+ }
53
+ },
54
+ "type": [
55
+ "null",
56
+ {
57
+ "type": "array",
58
+ "items": {
59
+ "type": "record",
60
+ "name": "Edge",
61
+ "namespace": "com.linkedin.pegasus2avro.common",
62
+ "fields": [
63
+ {
64
+ "java": {
65
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
66
+ },
67
+ "type": [
68
+ "null",
69
+ "string"
70
+ ],
71
+ "name": "sourceUrn",
72
+ "default": null,
73
+ "doc": "Urn of the source of this relationship edge.\nIf not specified, assumed to be the entity that this aspect belongs to.",
74
+ "Urn": "Urn"
75
+ },
76
+ {
77
+ "java": {
78
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
79
+ },
80
+ "type": "string",
81
+ "name": "destinationUrn",
82
+ "doc": "Urn of the destination of this relationship edge.",
83
+ "Urn": "Urn"
84
+ },
85
+ {
86
+ "type": [
87
+ "null",
88
+ {
89
+ "type": "record",
90
+ "name": "AuditStamp",
91
+ "namespace": "com.linkedin.pegasus2avro.common",
92
+ "fields": [
93
+ {
94
+ "type": "long",
95
+ "name": "time",
96
+ "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
97
+ },
98
+ {
99
+ "java": {
100
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
101
+ },
102
+ "type": "string",
103
+ "name": "actor",
104
+ "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
105
+ "Urn": "Urn"
106
+ },
107
+ {
108
+ "java": {
109
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
110
+ },
111
+ "type": [
112
+ "null",
113
+ "string"
114
+ ],
115
+ "name": "impersonator",
116
+ "default": null,
117
+ "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
118
+ "Urn": "Urn"
119
+ },
120
+ {
121
+ "type": [
122
+ "null",
123
+ "string"
124
+ ],
125
+ "name": "message",
126
+ "default": null,
127
+ "doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
128
+ }
129
+ ],
130
+ "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
131
+ }
132
+ ],
133
+ "name": "created",
134
+ "default": null,
135
+ "doc": "Audit stamp containing who created this relationship edge and when"
136
+ },
137
+ {
138
+ "type": [
139
+ "null",
140
+ "com.linkedin.pegasus2avro.common.AuditStamp"
141
+ ],
142
+ "name": "lastModified",
143
+ "default": null,
144
+ "doc": "Audit stamp containing who last modified this relationship edge and when"
145
+ },
146
+ {
147
+ "type": [
148
+ "null",
149
+ {
150
+ "type": "map",
151
+ "values": "string"
152
+ }
153
+ ],
154
+ "name": "properties",
155
+ "default": null,
156
+ "doc": "A generic properties bag that allows us to store specific information on this graph edge."
157
+ }
158
+ ],
159
+ "doc": "A common structure to represent all edges to entities when used inside aspects as collections\nThis ensures that all edges have common structure around audit-stamps and will support PATCH, time-travel automatically."
160
+ }
161
+ }
162
+ ],
163
+ "name": "outputEdges",
164
+ "default": null,
165
+ "doc": "Output assets produced by the data process instance during processing, with additional metadata.\nCounts as lineage.\nWill eventually deprecate the outputs field."
38
166
  }
39
167
  ],
40
168
  "doc": "Information about the outputs of a Data process"
datahub/sdk/dataset.py CHANGED
@@ -74,8 +74,8 @@ UpstreamLineageInputType: TypeAlias = Union[
74
74
  def _parse_upstream_input(
75
75
  upstream_input: UpstreamInputType,
76
76
  ) -> Union[models.UpstreamClass, models.FineGrainedLineageClass]:
77
- if isinstance(upstream_input, models.UpstreamClass) or isinstance(
78
- upstream_input, models.FineGrainedLineageClass
77
+ if isinstance(
78
+ upstream_input, (models.UpstreamClass, models.FineGrainedLineageClass)
79
79
  ):
80
80
  return upstream_input
81
81
  elif isinstance(upstream_input, (str, DatasetUrn)):
@@ -56,10 +56,7 @@ def get_dialect(platform: DialectOrStr) -> sqlglot.Dialect:
56
56
  def is_dialect_instance(
57
57
  dialect: sqlglot.Dialect, platforms: Union[str, Iterable[str]]
58
58
  ) -> bool:
59
- if isinstance(platforms, str):
60
- platforms = [platforms]
61
- else:
62
- platforms = list(platforms)
59
+ platforms = [platforms] if isinstance(platforms, str) else list(platforms)
63
60
 
64
61
  dialects = [get_dialect(platform) for platform in platforms]
65
62