acryl-datahub 1.1.0.5rc6__py3-none-any.whl → 1.1.0.5rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (78) hide show
  1. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/METADATA +2515 -2517
  2. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/RECORD +78 -75
  3. datahub/_version.py +1 -1
  4. datahub/cli/check_cli.py +0 -7
  5. datahub/cli/cli_utils.py +73 -0
  6. datahub/cli/delete_cli.py +0 -6
  7. datahub/cli/docker_check.py +107 -12
  8. datahub/cli/docker_cli.py +148 -228
  9. datahub/cli/exists_cli.py +0 -4
  10. datahub/cli/get_cli.py +0 -4
  11. datahub/cli/ingest_cli.py +1 -20
  12. datahub/cli/put_cli.py +0 -6
  13. datahub/cli/quickstart_versioning.py +50 -5
  14. datahub/cli/specific/assertions_cli.py +0 -6
  15. datahub/cli/specific/datacontract_cli.py +0 -6
  16. datahub/cli/specific/dataproduct_cli.py +0 -22
  17. datahub/cli/specific/dataset_cli.py +0 -11
  18. datahub/cli/specific/forms_cli.py +0 -6
  19. datahub/cli/specific/group_cli.py +0 -4
  20. datahub/cli/specific/structuredproperties_cli.py +0 -7
  21. datahub/cli/specific/user_cli.py +0 -4
  22. datahub/cli/state_cli.py +0 -4
  23. datahub/cli/timeline_cli.py +0 -4
  24. datahub/entrypoints.py +4 -3
  25. datahub/ingestion/api/report.py +183 -35
  26. datahub/ingestion/autogenerated/capability_summary.json +3431 -0
  27. datahub/ingestion/autogenerated/lineage.json +401 -0
  28. datahub/ingestion/autogenerated/lineage_helper.py +30 -128
  29. datahub/ingestion/extractor/schema_util.py +13 -4
  30. datahub/ingestion/graph/client.py +2 -2
  31. datahub/ingestion/run/pipeline.py +47 -1
  32. datahub/ingestion/source/bigquery_v2/bigquery.py +32 -23
  33. datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
  34. datahub/ingestion/source/common/subtypes.py +1 -1
  35. datahub/ingestion/source/data_lake_common/object_store.py +40 -0
  36. datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
  37. datahub/ingestion/source/dremio/dremio_source.py +7 -7
  38. datahub/ingestion/source/gcs/gcs_source.py +13 -2
  39. datahub/ingestion/source/ge_data_profiler.py +28 -20
  40. datahub/ingestion/source/identity/okta.py +0 -13
  41. datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
  42. datahub/ingestion/source/mock_data/datahub_mock_data.py +45 -0
  43. datahub/ingestion/source/powerbi/powerbi.py +0 -5
  44. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  45. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  46. datahub/ingestion/source/redshift/usage.py +4 -3
  47. datahub/ingestion/source/s3/source.py +19 -3
  48. datahub/ingestion/source/sigma/sigma.py +6 -1
  49. datahub/ingestion/source/snowflake/snowflake_config.py +11 -0
  50. datahub/ingestion/source/snowflake/snowflake_queries.py +147 -61
  51. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  52. datahub/ingestion/source/snowflake/snowflake_v2.py +11 -1
  53. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  54. datahub/ingestion/source/sql/hive_metastore.py +0 -10
  55. datahub/ingestion/source/sql/sql_common.py +4 -0
  56. datahub/ingestion/source/sql/vertica.py +0 -4
  57. datahub/ingestion/source/sql_queries.py +2 -2
  58. datahub/ingestion/source/superset.py +56 -1
  59. datahub/ingestion/source/tableau/tableau.py +40 -34
  60. datahub/ingestion/source/tableau/tableau_constant.py +0 -2
  61. datahub/ingestion/source/unity/proxy.py +4 -3
  62. datahub/ingestion/source/unity/source.py +19 -9
  63. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  64. datahub/metadata/_internal_schema_classes.py +85 -4
  65. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  66. datahub/metadata/schema.avsc +54 -1
  67. datahub/metadata/schemas/CorpUserSettings.avsc +17 -1
  68. datahub/metadata/schemas/GlobalSettingsInfo.avsc +37 -0
  69. datahub/sdk/lineage_client.py +2 -0
  70. datahub/sql_parsing/sql_parsing_aggregator.py +24 -15
  71. datahub/sql_parsing/sqlglot_lineage.py +40 -13
  72. datahub/upgrade/upgrade.py +46 -13
  73. datahub/utilities/server_config_util.py +8 -0
  74. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  75. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/WHEEL +0 -0
  76. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/entry_points.txt +0 -0
  77. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/licenses/LICENSE +0 -0
  78. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,401 @@
1
+ {
2
+ "entities": {
3
+ "dataJob": {
4
+ "dataJobInputOutput": {
5
+ "aspect": "dataJobInputOutput",
6
+ "fields": [
7
+ {
8
+ "name": "inputDatasets",
9
+ "path": "inputDatasets",
10
+ "isLineage": true,
11
+ "relationship": {
12
+ "name": "Consumes",
13
+ "entityTypes": [
14
+ "dataset"
15
+ ],
16
+ "isLineage": true
17
+ }
18
+ },
19
+ {
20
+ "name": "inputDatasetEdges",
21
+ "path": "inputDatasetEdges",
22
+ "isLineage": true,
23
+ "relationship": {
24
+ "name": "Consumes",
25
+ "entityTypes": [
26
+ "dataset"
27
+ ],
28
+ "isLineage": true
29
+ }
30
+ },
31
+ {
32
+ "name": "outputDatasets",
33
+ "path": "outputDatasets",
34
+ "isLineage": true,
35
+ "relationship": {
36
+ "name": "Produces",
37
+ "entityTypes": [
38
+ "dataset"
39
+ ],
40
+ "isLineage": true
41
+ }
42
+ },
43
+ {
44
+ "name": "outputDatasetEdges",
45
+ "path": "outputDatasetEdges",
46
+ "isLineage": true,
47
+ "relationship": {
48
+ "name": "Produces",
49
+ "entityTypes": [
50
+ "dataset"
51
+ ],
52
+ "isLineage": true
53
+ }
54
+ },
55
+ {
56
+ "name": "inputDatajobs",
57
+ "path": "inputDatajobs",
58
+ "isLineage": true,
59
+ "relationship": {
60
+ "name": "DownstreamOf",
61
+ "entityTypes": [
62
+ "dataJob"
63
+ ],
64
+ "isLineage": true
65
+ }
66
+ },
67
+ {
68
+ "name": "inputDatajobEdges",
69
+ "path": "inputDatajobEdges",
70
+ "isLineage": true,
71
+ "relationship": {
72
+ "name": "DownstreamOf",
73
+ "entityTypes": [
74
+ "dataJob"
75
+ ],
76
+ "isLineage": true
77
+ }
78
+ }
79
+ ]
80
+ }
81
+ },
82
+ "dataProcessInstance": {
83
+ "dataProcessInstanceOutput": {
84
+ "aspect": "dataProcessInstanceOutput",
85
+ "fields": [
86
+ {
87
+ "name": "outputEdges",
88
+ "path": "outputEdges",
89
+ "isLineage": true,
90
+ "relationship": {
91
+ "name": "DataProcessInstanceProduces",
92
+ "entityTypes": [
93
+ "dataset",
94
+ "mlModel",
95
+ "dataProcessInstance"
96
+ ],
97
+ "isLineage": true
98
+ }
99
+ }
100
+ ]
101
+ },
102
+ "dataProcessInstanceInput": {
103
+ "aspect": "dataProcessInstanceInput",
104
+ "fields": [
105
+ {
106
+ "name": "inputEdges",
107
+ "path": "inputEdges",
108
+ "isLineage": true,
109
+ "relationship": {
110
+ "name": "DataProcessInstanceConsumes",
111
+ "entityTypes": [
112
+ "dataset",
113
+ "mlModel",
114
+ "dataProcessInstance"
115
+ ],
116
+ "isLineage": true
117
+ }
118
+ }
119
+ ]
120
+ }
121
+ },
122
+ "dataProcess": {
123
+ "dataProcessInfo": {
124
+ "aspect": "dataProcessInfo",
125
+ "fields": [
126
+ {
127
+ "name": "inputs",
128
+ "path": "inputs",
129
+ "isLineage": true,
130
+ "relationship": {
131
+ "name": "Consumes",
132
+ "entityTypes": [
133
+ "dataset"
134
+ ],
135
+ "isLineage": true
136
+ }
137
+ },
138
+ {
139
+ "name": "outputs",
140
+ "path": "outputs",
141
+ "isLineage": true,
142
+ "relationship": {
143
+ "name": "Consumes",
144
+ "entityTypes": [
145
+ "dataset"
146
+ ],
147
+ "isLineage": true
148
+ }
149
+ }
150
+ ]
151
+ }
152
+ },
153
+ "dataset": {
154
+ "upstreamLineage": {
155
+ "aspect": "upstreamLineage",
156
+ "fields": [
157
+ {
158
+ "name": "dataset",
159
+ "path": "upstreams.dataset",
160
+ "isLineage": true,
161
+ "relationship": {
162
+ "name": "DownstreamOf",
163
+ "entityTypes": [
164
+ "dataset"
165
+ ],
166
+ "isLineage": true
167
+ }
168
+ }
169
+ ]
170
+ }
171
+ },
172
+ "chart": {
173
+ "chartInfo": {
174
+ "aspect": "chartInfo",
175
+ "fields": [
176
+ {
177
+ "name": "inputs",
178
+ "path": "inputs",
179
+ "isLineage": true,
180
+ "relationship": {
181
+ "name": "Consumes",
182
+ "entityTypes": [
183
+ "dataset"
184
+ ],
185
+ "isLineage": true
186
+ }
187
+ },
188
+ {
189
+ "name": "inputEdges",
190
+ "path": "inputEdges",
191
+ "isLineage": true,
192
+ "relationship": {
193
+ "name": "Consumes",
194
+ "entityTypes": [
195
+ "dataset"
196
+ ],
197
+ "isLineage": true
198
+ }
199
+ }
200
+ ]
201
+ }
202
+ },
203
+ "dashboard": {
204
+ "dashboardInfo": {
205
+ "aspect": "dashboardInfo",
206
+ "fields": [
207
+ {
208
+ "name": "charts",
209
+ "path": "charts",
210
+ "isLineage": true,
211
+ "relationship": {
212
+ "name": "Contains",
213
+ "entityTypes": [
214
+ "chart"
215
+ ],
216
+ "isLineage": true
217
+ }
218
+ },
219
+ {
220
+ "name": "chartEdges",
221
+ "path": "chartEdges",
222
+ "isLineage": true,
223
+ "relationship": {
224
+ "name": "Contains",
225
+ "entityTypes": [
226
+ "chart"
227
+ ],
228
+ "isLineage": true
229
+ }
230
+ },
231
+ {
232
+ "name": "datasets",
233
+ "path": "datasets",
234
+ "isLineage": true,
235
+ "relationship": {
236
+ "name": "Consumes",
237
+ "entityTypes": [
238
+ "dataset"
239
+ ],
240
+ "isLineage": true
241
+ }
242
+ },
243
+ {
244
+ "name": "datasetEdges",
245
+ "path": "datasetEdges",
246
+ "isLineage": true,
247
+ "relationship": {
248
+ "name": "Consumes",
249
+ "entityTypes": [
250
+ "dataset"
251
+ ],
252
+ "isLineage": true
253
+ }
254
+ },
255
+ {
256
+ "name": "dashboards",
257
+ "path": "dashboards",
258
+ "isLineage": true,
259
+ "relationship": {
260
+ "name": "DashboardContainsDashboard",
261
+ "entityTypes": [
262
+ "dashboard"
263
+ ],
264
+ "isLineage": true
265
+ }
266
+ }
267
+ ]
268
+ }
269
+ },
270
+ "mlModelGroup": {
271
+ "mlModelGroupProperties": {
272
+ "aspect": "mlModelGroupProperties",
273
+ "fields": [
274
+ {
275
+ "name": "trainingJobs",
276
+ "path": "trainingJobs",
277
+ "isLineage": true,
278
+ "relationship": {
279
+ "name": "TrainedBy",
280
+ "entityTypes": [
281
+ "dataJob",
282
+ "dataProcessInstance"
283
+ ],
284
+ "isLineage": true
285
+ }
286
+ },
287
+ {
288
+ "name": "downstreamJobs",
289
+ "path": "downstreamJobs",
290
+ "isLineage": true,
291
+ "relationship": {
292
+ "name": "UsedBy",
293
+ "entityTypes": [
294
+ "dataJob",
295
+ "dataProcessInstance"
296
+ ],
297
+ "isLineage": true
298
+ }
299
+ }
300
+ ]
301
+ }
302
+ },
303
+ "mlFeature": {
304
+ "mlFeatureProperties": {
305
+ "aspect": "mlFeatureProperties",
306
+ "fields": [
307
+ {
308
+ "name": "sources",
309
+ "path": "sources",
310
+ "isLineage": true,
311
+ "relationship": {
312
+ "name": "DerivedFrom",
313
+ "entityTypes": [
314
+ "dataset"
315
+ ],
316
+ "isLineage": true
317
+ }
318
+ }
319
+ ]
320
+ }
321
+ },
322
+ "mlPrimaryKey": {
323
+ "mlPrimaryKeyProperties": {
324
+ "aspect": "mlPrimaryKeyProperties",
325
+ "fields": [
326
+ {
327
+ "name": "sources",
328
+ "path": "sources",
329
+ "isLineage": true,
330
+ "relationship": {
331
+ "name": "DerivedFrom",
332
+ "entityTypes": [
333
+ "dataset"
334
+ ],
335
+ "isLineage": true
336
+ }
337
+ }
338
+ ]
339
+ }
340
+ },
341
+ "mlModel": {
342
+ "mlModelProperties": {
343
+ "aspect": "mlModelProperties",
344
+ "fields": [
345
+ {
346
+ "name": "trainingJobs",
347
+ "path": "trainingJobs",
348
+ "isLineage": true,
349
+ "relationship": {
350
+ "name": "TrainedBy",
351
+ "entityTypes": [
352
+ "dataJob",
353
+ "dataProcessInstance"
354
+ ],
355
+ "isLineage": true
356
+ }
357
+ },
358
+ {
359
+ "name": "downstreamJobs",
360
+ "path": "downstreamJobs",
361
+ "isLineage": true,
362
+ "relationship": {
363
+ "name": "UsedBy",
364
+ "entityTypes": [
365
+ "dataJob",
366
+ "dataProcessInstance"
367
+ ],
368
+ "isLineage": true
369
+ }
370
+ },
371
+ {
372
+ "name": "mlFeatures",
373
+ "path": "mlFeatures",
374
+ "isLineage": true,
375
+ "relationship": {
376
+ "name": "Consumes",
377
+ "entityTypes": [
378
+ "mlFeature"
379
+ ],
380
+ "isLineage": true
381
+ }
382
+ },
383
+ {
384
+ "name": "groups",
385
+ "path": "groups",
386
+ "isLineage": true,
387
+ "relationship": {
388
+ "name": "MemberOf",
389
+ "entityTypes": [
390
+ "mlModelGroup"
391
+ ],
392
+ "isLineage": true
393
+ }
394
+ }
395
+ ]
396
+ }
397
+ }
398
+ },
399
+ "generated_by": "metadata-ingestion/scripts/modeldocgen.py",
400
+ "generated_at": "2025-07-01T10:49:03.713749+00:00"
401
+ }
@@ -1,9 +1,8 @@
1
1
  import json
2
2
  import logging
3
+ from functools import lru_cache
3
4
  from pathlib import Path
4
- from typing import Any, Dict, List, Optional, Set
5
-
6
- from datahub.utilities.urns.urn import guess_entity_type
5
+ from typing import Dict, List, Optional
7
6
 
8
7
  logger = logging.getLogger(__name__)
9
8
 
@@ -18,10 +17,9 @@ def _load_lineage_data() -> Dict:
18
17
  Load lineage data from the autogenerated lineage.json file.
19
18
 
20
19
  Returns:
21
- Dict containing the lineage information
20
+ Dict containing the lineage information, or empty dict if file doesn't exist
22
21
 
23
22
  Raises:
24
- FileNotFoundError: If lineage.json doesn't exist
25
23
  json.JSONDecodeError: If lineage.json is malformed
26
24
  """
27
25
  global _lineage_data
@@ -34,151 +32,55 @@ def _load_lineage_data() -> Dict:
34
32
  lineage_file = current_file.parent / "lineage.json"
35
33
 
36
34
  if not lineage_file.exists():
37
- raise FileNotFoundError(f"Lineage file not found: {lineage_file}")
35
+ logger.warning(
36
+ f"Lineage file not found: {lineage_file}. "
37
+ "This may indicate a packaging issue. Lineage detection will be disabled."
38
+ )
39
+ _lineage_data = {}
40
+ return _lineage_data
38
41
 
39
42
  try:
40
43
  with open(lineage_file, "r") as f:
41
44
  _lineage_data = json.load(f)
42
45
  return _lineage_data
43
46
  except json.JSONDecodeError as e:
44
- raise json.JSONDecodeError(
45
- f"Failed to parse lineage.json: {e}", e.doc, e.pos
46
- ) from e
47
-
48
-
49
- def get_lineage_fields(entity_type: str, aspect_name: str) -> List[Dict]:
50
- """
51
- This is experimental internal API subject to breaking changes without prior notice.
52
-
53
- Get lineage fields for a specific entity type and aspect.
54
-
55
- Args:
56
- entity_type: The entity type (e.g., 'dataset', 'dataJob')
57
- aspect_name: The aspect name (e.g., 'upstreamLineage', 'dataJobInputOutput')
58
-
59
- Returns:
60
- List of lineage field dictionaries, each containing:
61
- - name: field name
62
- - path: dot-notation path to the field
63
- - isLineage: boolean indicating if it's lineage
64
- - relationship: relationship information
65
-
66
- Raises:
67
- FileNotFoundError: If lineage.json doesn't exist
68
- json.JSONDecodeError: If lineage.json is malformed
69
- """
70
- lineage_data = _load_lineage_data()
71
-
72
- entity_data = lineage_data.get("entities", {}).get(entity_type, {})
73
- aspect_data = entity_data.get(aspect_name, {})
74
-
75
- return aspect_data.get("fields", [])
76
-
77
-
78
- def is_lineage_field(urn: str, aspect_name: str, field_path: str) -> bool:
79
- """
80
- This is experimental internal API subject to breaking changes without prior notice.
81
-
82
- Check if a specific field path is lineage-related.
83
-
84
- Args:
85
- urn: The entity URN (e.g., 'urn:li:dataset:(urn:li:dataPlatform:mysql,test_db.test_table,PROD)')
86
- aspect_name: The aspect name (e.g., 'upstreamLineage', 'dataJobInputOutput')
87
- field_path: The dot-notation path to the field (e.g., 'upstreams.dataset')
88
-
89
- Returns:
90
- True if the field is lineage-related, False otherwise
91
-
92
- Raises:
93
- FileNotFoundError: If lineage.json doesn't exist
94
- json.JSONDecodeError: If lineage.json is malformed
95
- AssertionError: If URN doesn't start with 'urn:li:'
96
- """
97
- entity_type = guess_entity_type(urn)
98
- lineage_fields = get_lineage_fields(entity_type, aspect_name)
99
-
100
- for field in lineage_fields:
101
- if field.get("path") == field_path:
102
- return field.get("isLineage", False)
103
-
104
- return False
47
+ logger.error(
48
+ f"Failed to parse lineage.json: {e}. Lineage detection will be disabled."
49
+ )
50
+ _lineage_data = {}
51
+ return _lineage_data
105
52
 
106
53
 
107
- def has_lineage(urn: str, aspect: Any) -> bool:
54
+ def _get_fields(entity_type: str, aspect_name: str) -> List[Dict]:
108
55
  """
109
56
  This is experimental internal API subject to breaking changes without prior notice.
110
-
111
- Check if an aspect has any lineage fields.
112
-
113
- Args:
114
- urn: The entity URN (e.g., 'urn:li:dataset:(urn:li:dataPlatform:mysql,test_db.test_table,PROD)')
115
- aspect: The aspect object
116
-
117
- Returns:
118
- True if the aspect has lineage fields, False otherwise
119
-
120
- Raises:
121
- FileNotFoundError: If lineage.json doesn't exist
122
- json.JSONDecodeError: If lineage.json is malformed
123
- AssertionError: If URN doesn't start with 'urn:li:'
124
57
  """
125
- entity_type = guess_entity_type(urn)
126
- aspect_class = getattr(aspect, "__class__", None)
127
- aspect_name = (
128
- aspect_class.__name__ if aspect_class is not None else str(type(aspect))
58
+ return (
59
+ _load_lineage_data()
60
+ .get("entities", {})
61
+ .get(entity_type, {})
62
+ .get(aspect_name, {})
63
+ .get("fields", [])
129
64
  )
130
65
 
131
- lineage_fields = get_lineage_fields(entity_type, aspect_name)
132
- return len(lineage_fields) > 0
133
-
134
66
 
135
- def has_lineage_aspect(entity_type: str, aspect_name: str) -> bool:
67
+ def _get_lineage_fields(entity_type: str, aspect_name: str) -> List[Dict]:
136
68
  """
137
69
  This is experimental internal API subject to breaking changes without prior notice.
138
-
139
- Check if an aspect has any lineage fields.
140
-
141
- Args:
142
- entity_type: The entity type (e.g., 'dataset', 'dataJob')
143
- aspect_name: The aspect name (e.g., 'upstreamLineage', 'dataJobInputOutput')
144
-
145
- Returns:
146
- True if the aspect has lineage fields, False otherwise
147
-
148
- Raises:
149
- FileNotFoundError: If lineage.json doesn't exist
150
- json.JSONDecodeError: If lineage.json is malformed
151
70
  """
152
- lineage_fields = get_lineage_fields(entity_type, aspect_name)
153
- return len(lineage_fields) > 0
71
+ return [
72
+ field
73
+ for field in _get_fields(entity_type, aspect_name)
74
+ if field.get("isLineage", False)
75
+ ]
154
76
 
155
77
 
156
- def get_all_lineage_aspects(entity_type: str) -> Set[str]:
78
+ @lru_cache(maxsize=128)
79
+ def is_lineage_aspect(entity_type: str, aspect_name: str) -> bool:
157
80
  """
158
81
  This is experimental internal API subject to breaking changes without prior notice.
159
-
160
- Get all aspects that have lineage fields for a given entity type.
161
-
162
- Args:
163
- entity_type: The entity type (e.g., 'dataset', 'dataJob')
164
-
165
- Returns:
166
- Set of aspect names that have lineage fields
167
-
168
- Raises:
169
- FileNotFoundError: If lineage.json doesn't exist
170
- json.JSONDecodeError: If lineage.json is malformed
171
82
  """
172
- lineage_data = _load_lineage_data()
173
-
174
- entity_data = lineage_data.get("entities", {}).get(entity_type, {})
175
- lineage_aspects = set()
176
-
177
- for aspect_name, aspect_data in entity_data.items():
178
- if aspect_data.get("fields"):
179
- lineage_aspects.add(aspect_name)
180
-
181
- return lineage_aspects
83
+ return len(_get_lineage_fields(entity_type, aspect_name)) > 0
182
84
 
183
85
 
184
86
  def clear_cache() -> None:
@@ -125,7 +125,7 @@ class AvroToMceSchemaConverter:
125
125
  self._prefix_name_stack: PrefixNameStack = [self.version_string]
126
126
  # Tracks the fields on the current path.
127
127
  self._fields_stack: FieldStack = []
128
- # Tracks the record types seen so far. Used to prevent infinite recursion with recursive types.
128
+ # Stack of record types currently being processed. Used to prevent infinite recursion with recursive types.
129
129
  self._record_types_seen: List[str] = []
130
130
  # If part of the key-schema or value-schema.
131
131
  self._is_key_schema = is_key_schema
@@ -522,10 +522,12 @@ class AvroToMceSchemaConverter:
522
522
  # Handle recursive record definitions
523
523
  recurse: bool = True
524
524
  if isinstance(schema, avro.schema.RecordSchema):
525
- if schema.fullname not in self._record_types_seen:
526
- self._record_types_seen.append(schema.fullname)
527
- else:
525
+ # Only prevent recursion if we're currently processing this record type (true recursion)
526
+ # Allow reuse of the same record type in different contexts
527
+ if schema.fullname in self._record_types_seen:
528
528
  recurse = False
529
+ else:
530
+ self._record_types_seen.append(schema.fullname)
529
531
 
530
532
  # Adjust actual schema if needed
531
533
  actual_schema = self._get_underlying_type_if_option_as_union(schema, schema)
@@ -559,6 +561,13 @@ class AvroToMceSchemaConverter:
559
561
  for sub_schema in self._get_sub_schemas(actual_schema):
560
562
  yield from self._to_mce_fields(sub_schema)
561
563
 
564
+ # Clean up the processing stack
565
+ if (
566
+ isinstance(schema, avro.schema.RecordSchema)
567
+ and schema.fullname in self._record_types_seen
568
+ ):
569
+ self._record_types_seen.remove(schema.fullname)
570
+
562
571
  def _gen_non_nested_to_mce_fields(
563
572
  self, schema: SchemaOrField
564
573
  ) -> Iterable[SchemaField]:
@@ -1576,7 +1576,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1576
1576
  env: str = DEFAULT_ENV,
1577
1577
  default_db: Optional[str] = None,
1578
1578
  default_schema: Optional[str] = None,
1579
- default_dialect: Optional[str] = None,
1579
+ override_dialect: Optional[str] = None,
1580
1580
  ) -> "SqlParsingResult":
1581
1581
  from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
1582
1582
 
@@ -1590,7 +1590,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1590
1590
  schema_resolver=schema_resolver,
1591
1591
  default_db=default_db,
1592
1592
  default_schema=default_schema,
1593
- default_dialect=default_dialect,
1593
+ override_dialect=override_dialect,
1594
1594
  )
1595
1595
 
1596
1596
  def create_tag(self, tag_name: str) -> str: