acryl-datahub 1.1.0.5rc7__py3-none-any.whl → 1.1.0.5rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/METADATA +2620 -2622
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/RECORD +59 -59
- datahub/_version.py +1 -1
- datahub/cli/check_cli.py +0 -7
- datahub/cli/cli_utils.py +73 -0
- datahub/cli/delete_cli.py +0 -6
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +148 -228
- datahub/cli/exists_cli.py +0 -4
- datahub/cli/get_cli.py +0 -4
- datahub/cli/ingest_cli.py +1 -20
- datahub/cli/put_cli.py +0 -6
- datahub/cli/quickstart_versioning.py +50 -5
- datahub/cli/specific/assertions_cli.py +0 -6
- datahub/cli/specific/datacontract_cli.py +0 -6
- datahub/cli/specific/dataproduct_cli.py +0 -22
- datahub/cli/specific/dataset_cli.py +0 -11
- datahub/cli/specific/forms_cli.py +0 -6
- datahub/cli/specific/group_cli.py +0 -4
- datahub/cli/specific/structuredproperties_cli.py +0 -7
- datahub/cli/specific/user_cli.py +0 -4
- datahub/cli/state_cli.py +0 -4
- datahub/cli/timeline_cli.py +0 -4
- datahub/entrypoints.py +4 -3
- datahub/ingestion/autogenerated/capability_summary.json +88 -23
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/graph/client.py +2 -2
- datahub/ingestion/run/pipeline.py +43 -0
- datahub/ingestion/source/bigquery_v2/bigquery.py +9 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dremio/dremio_source.py +1 -4
- datahub/ingestion/source/gcs/gcs_source.py +9 -1
- datahub/ingestion/source/identity/okta.py +0 -13
- datahub/ingestion/source/powerbi/powerbi.py +0 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/sigma/sigma.py +6 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +11 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +100 -58
- datahub/ingestion/source/snowflake/snowflake_v2.py +11 -1
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +1 -1
- datahub/ingestion/source/sql/hive_metastore.py +0 -10
- datahub/ingestion/source/sql/sql_common.py +8 -0
- datahub/ingestion/source/sql/teradata.py +993 -234
- datahub/ingestion/source/sql/vertica.py +0 -4
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/superset.py +56 -1
- datahub/ingestion/source/tableau/tableau.py +40 -34
- datahub/ingestion/source/tableau/tableau_constant.py +0 -2
- datahub/ingestion/source/unity/source.py +9 -1
- datahub/sdk/lineage_client.py +2 -2
- datahub/sql_parsing/sql_parsing_aggregator.py +21 -12
- datahub/sql_parsing/sqlglot_lineage.py +40 -15
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"generated_at": "2025-07-
|
|
2
|
+
"generated_at": "2025-07-11T05:33:33.512319+00:00",
|
|
3
3
|
"generated_by": "metadata-ingestion/scripts/capability_summary.py",
|
|
4
4
|
"plugin_details": {
|
|
5
5
|
"abs": {
|
|
@@ -33,7 +33,10 @@
|
|
|
33
33
|
{
|
|
34
34
|
"capability": "CONTAINERS",
|
|
35
35
|
"description": "Enabled by default",
|
|
36
|
-
"subtype_modifier":
|
|
36
|
+
"subtype_modifier": [
|
|
37
|
+
"Database",
|
|
38
|
+
"Schema"
|
|
39
|
+
],
|
|
37
40
|
"supported": true
|
|
38
41
|
},
|
|
39
42
|
{
|
|
@@ -127,7 +130,10 @@
|
|
|
127
130
|
{
|
|
128
131
|
"capability": "CONTAINERS",
|
|
129
132
|
"description": "Enabled by default",
|
|
130
|
-
"subtype_modifier":
|
|
133
|
+
"subtype_modifier": [
|
|
134
|
+
"Project",
|
|
135
|
+
"Dataset"
|
|
136
|
+
],
|
|
131
137
|
"supported": true
|
|
132
138
|
},
|
|
133
139
|
{
|
|
@@ -245,7 +251,10 @@
|
|
|
245
251
|
{
|
|
246
252
|
"capability": "CONTAINERS",
|
|
247
253
|
"description": "Enabled by default",
|
|
248
|
-
"subtype_modifier":
|
|
254
|
+
"subtype_modifier": [
|
|
255
|
+
"Database",
|
|
256
|
+
"Schema"
|
|
257
|
+
],
|
|
249
258
|
"supported": true
|
|
250
259
|
},
|
|
251
260
|
{
|
|
@@ -343,7 +352,10 @@
|
|
|
343
352
|
{
|
|
344
353
|
"capability": "CONTAINERS",
|
|
345
354
|
"description": "Enabled by default",
|
|
346
|
-
"subtype_modifier":
|
|
355
|
+
"subtype_modifier": [
|
|
356
|
+
"Database",
|
|
357
|
+
"Schema"
|
|
358
|
+
],
|
|
347
359
|
"supported": true
|
|
348
360
|
},
|
|
349
361
|
{
|
|
@@ -675,7 +687,10 @@
|
|
|
675
687
|
{
|
|
676
688
|
"capability": "CONTAINERS",
|
|
677
689
|
"description": "Enabled by default",
|
|
678
|
-
"subtype_modifier":
|
|
690
|
+
"subtype_modifier": [
|
|
691
|
+
"Database",
|
|
692
|
+
"Schema"
|
|
693
|
+
],
|
|
679
694
|
"supported": true
|
|
680
695
|
},
|
|
681
696
|
{
|
|
@@ -871,7 +886,10 @@
|
|
|
871
886
|
{
|
|
872
887
|
"capability": "CONTAINERS",
|
|
873
888
|
"description": "Enabled by default",
|
|
874
|
-
"subtype_modifier":
|
|
889
|
+
"subtype_modifier": [
|
|
890
|
+
"GCS bucket",
|
|
891
|
+
"Folder"
|
|
892
|
+
],
|
|
875
893
|
"supported": true
|
|
876
894
|
},
|
|
877
895
|
{
|
|
@@ -955,7 +973,10 @@
|
|
|
955
973
|
{
|
|
956
974
|
"capability": "CONTAINERS",
|
|
957
975
|
"description": "Enabled by default",
|
|
958
|
-
"subtype_modifier":
|
|
976
|
+
"subtype_modifier": [
|
|
977
|
+
"Database",
|
|
978
|
+
"Schema"
|
|
979
|
+
],
|
|
959
980
|
"supported": true
|
|
960
981
|
},
|
|
961
982
|
{
|
|
@@ -1071,7 +1092,10 @@
|
|
|
1071
1092
|
{
|
|
1072
1093
|
"capability": "CONTAINERS",
|
|
1073
1094
|
"description": "Enabled by default",
|
|
1074
|
-
"subtype_modifier":
|
|
1095
|
+
"subtype_modifier": [
|
|
1096
|
+
"Database",
|
|
1097
|
+
"Schema"
|
|
1098
|
+
],
|
|
1075
1099
|
"supported": true
|
|
1076
1100
|
},
|
|
1077
1101
|
{
|
|
@@ -1143,7 +1167,10 @@
|
|
|
1143
1167
|
{
|
|
1144
1168
|
"capability": "CONTAINERS",
|
|
1145
1169
|
"description": "Enabled by default",
|
|
1146
|
-
"subtype_modifier":
|
|
1170
|
+
"subtype_modifier": [
|
|
1171
|
+
"Database",
|
|
1172
|
+
"Schema"
|
|
1173
|
+
],
|
|
1147
1174
|
"supported": true
|
|
1148
1175
|
},
|
|
1149
1176
|
{
|
|
@@ -1497,7 +1524,10 @@
|
|
|
1497
1524
|
{
|
|
1498
1525
|
"capability": "CONTAINERS",
|
|
1499
1526
|
"description": "Enabled by default",
|
|
1500
|
-
"subtype_modifier":
|
|
1527
|
+
"subtype_modifier": [
|
|
1528
|
+
"Database",
|
|
1529
|
+
"Schema"
|
|
1530
|
+
],
|
|
1501
1531
|
"supported": true
|
|
1502
1532
|
},
|
|
1503
1533
|
{
|
|
@@ -1703,7 +1733,10 @@
|
|
|
1703
1733
|
{
|
|
1704
1734
|
"capability": "CONTAINERS",
|
|
1705
1735
|
"description": "Enabled by default",
|
|
1706
|
-
"subtype_modifier":
|
|
1736
|
+
"subtype_modifier": [
|
|
1737
|
+
"Database",
|
|
1738
|
+
"Schema"
|
|
1739
|
+
],
|
|
1707
1740
|
"supported": true
|
|
1708
1741
|
},
|
|
1709
1742
|
{
|
|
@@ -1783,7 +1816,10 @@
|
|
|
1783
1816
|
{
|
|
1784
1817
|
"capability": "CONTAINERS",
|
|
1785
1818
|
"description": "Enabled by default",
|
|
1786
|
-
"subtype_modifier":
|
|
1819
|
+
"subtype_modifier": [
|
|
1820
|
+
"Database",
|
|
1821
|
+
"Schema"
|
|
1822
|
+
],
|
|
1787
1823
|
"supported": true
|
|
1788
1824
|
},
|
|
1789
1825
|
{
|
|
@@ -1935,7 +1971,10 @@
|
|
|
1935
1971
|
{
|
|
1936
1972
|
"capability": "CONTAINERS",
|
|
1937
1973
|
"description": "Enabled by default",
|
|
1938
|
-
"subtype_modifier":
|
|
1974
|
+
"subtype_modifier": [
|
|
1975
|
+
"Database",
|
|
1976
|
+
"Schema"
|
|
1977
|
+
],
|
|
1939
1978
|
"supported": true
|
|
1940
1979
|
},
|
|
1941
1980
|
{
|
|
@@ -2001,7 +2040,10 @@
|
|
|
2001
2040
|
{
|
|
2002
2041
|
"capability": "CONTAINERS",
|
|
2003
2042
|
"description": "Enabled by default",
|
|
2004
|
-
"subtype_modifier":
|
|
2043
|
+
"subtype_modifier": [
|
|
2044
|
+
"Database",
|
|
2045
|
+
"Schema"
|
|
2046
|
+
],
|
|
2005
2047
|
"supported": true
|
|
2006
2048
|
},
|
|
2007
2049
|
{
|
|
@@ -2199,7 +2241,10 @@
|
|
|
2199
2241
|
{
|
|
2200
2242
|
"capability": "CONTAINERS",
|
|
2201
2243
|
"description": "Enabled by default",
|
|
2202
|
-
"subtype_modifier":
|
|
2244
|
+
"subtype_modifier": [
|
|
2245
|
+
"Database",
|
|
2246
|
+
"Schema"
|
|
2247
|
+
],
|
|
2203
2248
|
"supported": true
|
|
2204
2249
|
},
|
|
2205
2250
|
{
|
|
@@ -2271,7 +2316,10 @@
|
|
|
2271
2316
|
{
|
|
2272
2317
|
"capability": "CONTAINERS",
|
|
2273
2318
|
"description": "Enabled by default",
|
|
2274
|
-
"subtype_modifier":
|
|
2319
|
+
"subtype_modifier": [
|
|
2320
|
+
"Database",
|
|
2321
|
+
"Schema"
|
|
2322
|
+
],
|
|
2275
2323
|
"supported": true
|
|
2276
2324
|
},
|
|
2277
2325
|
{
|
|
@@ -2677,7 +2725,9 @@
|
|
|
2677
2725
|
{
|
|
2678
2726
|
"capability": "CONTAINERS",
|
|
2679
2727
|
"description": "Enabled by default",
|
|
2680
|
-
"subtype_modifier":
|
|
2728
|
+
"subtype_modifier": [
|
|
2729
|
+
"Sigma Workspace"
|
|
2730
|
+
],
|
|
2681
2731
|
"supported": true
|
|
2682
2732
|
},
|
|
2683
2733
|
{
|
|
@@ -2753,7 +2803,10 @@
|
|
|
2753
2803
|
{
|
|
2754
2804
|
"capability": "CONTAINERS",
|
|
2755
2805
|
"description": "Enabled by default",
|
|
2756
|
-
"subtype_modifier":
|
|
2806
|
+
"subtype_modifier": [
|
|
2807
|
+
"Database",
|
|
2808
|
+
"Schema"
|
|
2809
|
+
],
|
|
2757
2810
|
"supported": true
|
|
2758
2811
|
},
|
|
2759
2812
|
{
|
|
@@ -2859,7 +2912,10 @@
|
|
|
2859
2912
|
{
|
|
2860
2913
|
"capability": "CONTAINERS",
|
|
2861
2914
|
"description": "Enabled by default",
|
|
2862
|
-
"subtype_modifier":
|
|
2915
|
+
"subtype_modifier": [
|
|
2916
|
+
"Database",
|
|
2917
|
+
"Schema"
|
|
2918
|
+
],
|
|
2863
2919
|
"supported": true
|
|
2864
2920
|
},
|
|
2865
2921
|
{
|
|
@@ -3122,7 +3178,10 @@
|
|
|
3122
3178
|
{
|
|
3123
3179
|
"capability": "CONTAINERS",
|
|
3124
3180
|
"description": "Enabled by default",
|
|
3125
|
-
"subtype_modifier":
|
|
3181
|
+
"subtype_modifier": [
|
|
3182
|
+
"Database",
|
|
3183
|
+
"Schema"
|
|
3184
|
+
],
|
|
3126
3185
|
"supported": true
|
|
3127
3186
|
},
|
|
3128
3187
|
{
|
|
@@ -3194,7 +3253,10 @@
|
|
|
3194
3253
|
{
|
|
3195
3254
|
"capability": "CONTAINERS",
|
|
3196
3255
|
"description": "Enabled by default",
|
|
3197
|
-
"subtype_modifier":
|
|
3256
|
+
"subtype_modifier": [
|
|
3257
|
+
"Catalog",
|
|
3258
|
+
"Schema"
|
|
3259
|
+
],
|
|
3198
3260
|
"supported": true
|
|
3199
3261
|
},
|
|
3200
3262
|
{
|
|
@@ -3288,7 +3350,10 @@
|
|
|
3288
3350
|
{
|
|
3289
3351
|
"capability": "CONTAINERS",
|
|
3290
3352
|
"description": "Enabled by default",
|
|
3291
|
-
"subtype_modifier":
|
|
3353
|
+
"subtype_modifier": [
|
|
3354
|
+
"Database",
|
|
3355
|
+
"Schema"
|
|
3356
|
+
],
|
|
3292
3357
|
"supported": true
|
|
3293
3358
|
},
|
|
3294
3359
|
{
|
|
@@ -125,7 +125,7 @@ class AvroToMceSchemaConverter:
|
|
|
125
125
|
self._prefix_name_stack: PrefixNameStack = [self.version_string]
|
|
126
126
|
# Tracks the fields on the current path.
|
|
127
127
|
self._fields_stack: FieldStack = []
|
|
128
|
-
#
|
|
128
|
+
# Stack of record types currently being processed. Used to prevent infinite recursion with recursive types.
|
|
129
129
|
self._record_types_seen: List[str] = []
|
|
130
130
|
# If part of the key-schema or value-schema.
|
|
131
131
|
self._is_key_schema = is_key_schema
|
|
@@ -522,10 +522,12 @@ class AvroToMceSchemaConverter:
|
|
|
522
522
|
# Handle recursive record definitions
|
|
523
523
|
recurse: bool = True
|
|
524
524
|
if isinstance(schema, avro.schema.RecordSchema):
|
|
525
|
-
if
|
|
526
|
-
|
|
527
|
-
|
|
525
|
+
# Only prevent recursion if we're currently processing this record type (true recursion)
|
|
526
|
+
# Allow reuse of the same record type in different contexts
|
|
527
|
+
if schema.fullname in self._record_types_seen:
|
|
528
528
|
recurse = False
|
|
529
|
+
else:
|
|
530
|
+
self._record_types_seen.append(schema.fullname)
|
|
529
531
|
|
|
530
532
|
# Adjust actual schema if needed
|
|
531
533
|
actual_schema = self._get_underlying_type_if_option_as_union(schema, schema)
|
|
@@ -559,6 +561,13 @@ class AvroToMceSchemaConverter:
|
|
|
559
561
|
for sub_schema in self._get_sub_schemas(actual_schema):
|
|
560
562
|
yield from self._to_mce_fields(sub_schema)
|
|
561
563
|
|
|
564
|
+
# Clean up the processing stack
|
|
565
|
+
if (
|
|
566
|
+
isinstance(schema, avro.schema.RecordSchema)
|
|
567
|
+
and schema.fullname in self._record_types_seen
|
|
568
|
+
):
|
|
569
|
+
self._record_types_seen.remove(schema.fullname)
|
|
570
|
+
|
|
562
571
|
def _gen_non_nested_to_mce_fields(
|
|
563
572
|
self, schema: SchemaOrField
|
|
564
573
|
) -> Iterable[SchemaField]:
|
|
@@ -1576,7 +1576,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1576
1576
|
env: str = DEFAULT_ENV,
|
|
1577
1577
|
default_db: Optional[str] = None,
|
|
1578
1578
|
default_schema: Optional[str] = None,
|
|
1579
|
-
|
|
1579
|
+
override_dialect: Optional[str] = None,
|
|
1580
1580
|
) -> "SqlParsingResult":
|
|
1581
1581
|
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
|
|
1582
1582
|
|
|
@@ -1590,7 +1590,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1590
1590
|
schema_resolver=schema_resolver,
|
|
1591
1591
|
default_db=default_db,
|
|
1592
1592
|
default_schema=default_schema,
|
|
1593
|
-
|
|
1593
|
+
override_dialect=override_dialect,
|
|
1594
1594
|
)
|
|
1595
1595
|
|
|
1596
1596
|
def create_tag(self, tag_name: str) -> str:
|
|
@@ -44,6 +44,10 @@ from datahub.ingestion.transformer.transform_registry import transform_registry
|
|
|
44
44
|
from datahub.sdk._attribution import KnownAttribution, change_default_attribution
|
|
45
45
|
from datahub.telemetry import stats
|
|
46
46
|
from datahub.telemetry.telemetry import telemetry_instance
|
|
47
|
+
from datahub.upgrade.upgrade import (
|
|
48
|
+
is_server_default_cli_ahead,
|
|
49
|
+
retrieve_version_stats,
|
|
50
|
+
)
|
|
47
51
|
from datahub.utilities._custom_package_loader import model_version_name
|
|
48
52
|
from datahub.utilities.global_warning_util import (
|
|
49
53
|
clear_global_warnings,
|
|
@@ -343,6 +347,44 @@ class Pipeline:
|
|
|
343
347
|
except Exception as e:
|
|
344
348
|
logger.warning("Reporting failed on start", exc_info=e)
|
|
345
349
|
|
|
350
|
+
def _warn_old_cli_version(self) -> None:
|
|
351
|
+
"""
|
|
352
|
+
Check if the server default CLI version is ahead of the CLI version being used.
|
|
353
|
+
If so, add a warning to the report.
|
|
354
|
+
"""
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
version_stats = retrieve_version_stats(timeout=2.0, graph=self.graph)
|
|
358
|
+
except RuntimeError as e:
|
|
359
|
+
# Handle case where there's no event loop available (e.g., in ThreadPoolExecutor)
|
|
360
|
+
if "no current event loop" in str(e):
|
|
361
|
+
logger.debug("Skipping version check - no event loop available")
|
|
362
|
+
return
|
|
363
|
+
raise
|
|
364
|
+
|
|
365
|
+
if not version_stats or not self.graph:
|
|
366
|
+
return
|
|
367
|
+
|
|
368
|
+
if is_server_default_cli_ahead(version_stats):
|
|
369
|
+
server_default_version = (
|
|
370
|
+
version_stats.server.current_server_default_cli_version.version
|
|
371
|
+
if version_stats.server.current_server_default_cli_version
|
|
372
|
+
else None
|
|
373
|
+
)
|
|
374
|
+
current_version = version_stats.client.current.version
|
|
375
|
+
|
|
376
|
+
logger.debug(f"""
|
|
377
|
+
client_version: {current_version}
|
|
378
|
+
server_default_version: {server_default_version}
|
|
379
|
+
server_default_cli_ahead: True
|
|
380
|
+
""")
|
|
381
|
+
|
|
382
|
+
self.source.get_report().warning(
|
|
383
|
+
title="Server default CLI version is ahead of CLI version",
|
|
384
|
+
message="Please upgrade the CLI version being used",
|
|
385
|
+
context=f"Server Default CLI version: {server_default_version}, Used CLI version: {current_version}",
|
|
386
|
+
)
|
|
387
|
+
|
|
346
388
|
def _notify_reporters_on_ingestion_completion(self) -> None:
|
|
347
389
|
for reporter in self.reporters:
|
|
348
390
|
try:
|
|
@@ -399,6 +441,7 @@ class Pipeline:
|
|
|
399
441
|
return False
|
|
400
442
|
|
|
401
443
|
def run(self) -> None:
|
|
444
|
+
self._warn_old_cli_version()
|
|
402
445
|
with self.exit_stack, self.inner_exit_stack:
|
|
403
446
|
if self.config.flags.generate_memory_profiles:
|
|
404
447
|
import memray
|
|
@@ -45,6 +45,7 @@ from datahub.ingestion.source.bigquery_v2.queries_extractor import (
|
|
|
45
45
|
BigQueryQueriesExtractorConfig,
|
|
46
46
|
)
|
|
47
47
|
from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor
|
|
48
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
48
49
|
from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
|
|
49
50
|
from datahub.ingestion.source.state.redundant_run_skip_handler import (
|
|
50
51
|
RedundantLineageRunSkipHandler,
|
|
@@ -78,7 +79,14 @@ def cleanup(config: BigQueryV2Config) -> None:
|
|
|
78
79
|
supported=False,
|
|
79
80
|
)
|
|
80
81
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
81
|
-
@capability(
|
|
82
|
+
@capability(
|
|
83
|
+
SourceCapability.CONTAINERS,
|
|
84
|
+
"Enabled by default",
|
|
85
|
+
subtype_modifier=[
|
|
86
|
+
SourceCapabilityModifier.BIGQUERY_PROJECT,
|
|
87
|
+
SourceCapabilityModifier.BIGQUERY_DATASET,
|
|
88
|
+
],
|
|
89
|
+
)
|
|
82
90
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
83
91
|
@capability(
|
|
84
92
|
SourceCapability.DATA_PROFILING,
|
|
@@ -12,7 +12,7 @@ from datahub.emitter.serialization_helper import post_json_transform
|
|
|
12
12
|
from datahub.ingestion.source.datahub.config import DataHubSourceConfig
|
|
13
13
|
from datahub.ingestion.source.datahub.report import DataHubSourceReport
|
|
14
14
|
from datahub.ingestion.source.sql.sql_config import SQLAlchemyConnectionConfig
|
|
15
|
-
from datahub.metadata.schema_classes import
|
|
15
|
+
from datahub.metadata.schema_classes import SystemMetadataClass
|
|
16
16
|
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
@@ -374,7 +374,6 @@ class DataHubDatabaseReader:
|
|
|
374
374
|
entityUrn=row["urn"],
|
|
375
375
|
aspect=ASPECT_MAP[row["aspect"]].from_obj(json_aspect),
|
|
376
376
|
systemMetadata=system_metadata,
|
|
377
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
378
377
|
)
|
|
379
378
|
except Exception as e:
|
|
380
379
|
logger.warning(
|
|
@@ -61,7 +61,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
|
61
61
|
UpstreamClass,
|
|
62
62
|
UpstreamLineage,
|
|
63
63
|
)
|
|
64
|
-
from datahub.metadata.schema_classes import
|
|
64
|
+
from datahub.metadata.schema_classes import SchemaMetadataClass
|
|
65
65
|
from datahub.metadata.urns import CorpUserUrn
|
|
66
66
|
from datahub.sql_parsing.sql_parsing_aggregator import (
|
|
67
67
|
KnownQueryLineageInfo,
|
|
@@ -433,11 +433,8 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
433
433
|
]
|
|
434
434
|
)
|
|
435
435
|
mcp = MetadataChangeProposalWrapper(
|
|
436
|
-
entityType="dataset",
|
|
437
436
|
entityUrn=dataset_urn,
|
|
438
|
-
aspectName=lineage.ASPECT_NAME,
|
|
439
437
|
aspect=lineage,
|
|
440
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
441
438
|
)
|
|
442
439
|
|
|
443
440
|
for upstream_urn in upstream_urns:
|
|
@@ -16,6 +16,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
16
16
|
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceCapability
|
|
17
17
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
18
18
|
from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
|
|
19
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
19
20
|
from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin
|
|
20
21
|
from datahub.ingestion.source.data_lake_common.data_lake_utils import PLATFORM_GCS
|
|
21
22
|
from datahub.ingestion.source.data_lake_common.object_store import (
|
|
@@ -82,7 +83,14 @@ class GCSSourceReport(DataLakeSourceReport):
|
|
|
82
83
|
@platform_name("Google Cloud Storage", id=PLATFORM_GCS)
|
|
83
84
|
@config_class(GCSSourceConfig)
|
|
84
85
|
@support_status(SupportStatus.INCUBATING)
|
|
85
|
-
@capability(
|
|
86
|
+
@capability(
|
|
87
|
+
SourceCapability.CONTAINERS,
|
|
88
|
+
"Enabled by default",
|
|
89
|
+
subtype_modifier=[
|
|
90
|
+
SourceCapabilityModifier.GCS_BUCKET,
|
|
91
|
+
SourceCapabilityModifier.FOLDER,
|
|
92
|
+
],
|
|
93
|
+
)
|
|
86
94
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
87
95
|
@capability(SourceCapability.DATA_PROFILING, "Not supported", supported=False)
|
|
88
96
|
class GCSSource(StatefulIngestionSourceBase):
|
|
@@ -41,7 +41,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
|
|
41
41
|
)
|
|
42
42
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
43
43
|
from datahub.metadata.schema_classes import (
|
|
44
|
-
ChangeTypeClass,
|
|
45
44
|
CorpGroupInfoClass,
|
|
46
45
|
CorpUserInfoClass,
|
|
47
46
|
GroupMembershipClass,
|
|
@@ -332,18 +331,12 @@ class OktaSource(StatefulIngestionSourceBase):
|
|
|
332
331
|
yield MetadataWorkUnit(id=wu_id, mce=mce)
|
|
333
332
|
|
|
334
333
|
yield MetadataChangeProposalWrapper(
|
|
335
|
-
entityType="corpGroup",
|
|
336
334
|
entityUrn=datahub_corp_group_snapshot.urn,
|
|
337
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
338
|
-
aspectName="origin",
|
|
339
335
|
aspect=OriginClass(OriginTypeClass.EXTERNAL, "OKTA"),
|
|
340
336
|
).as_workunit()
|
|
341
337
|
|
|
342
338
|
yield MetadataChangeProposalWrapper(
|
|
343
|
-
entityType="corpGroup",
|
|
344
339
|
entityUrn=datahub_corp_group_snapshot.urn,
|
|
345
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
346
|
-
aspectName="status",
|
|
347
340
|
aspect=StatusClass(removed=False),
|
|
348
341
|
).as_workunit()
|
|
349
342
|
|
|
@@ -418,18 +411,12 @@ class OktaSource(StatefulIngestionSourceBase):
|
|
|
418
411
|
yield MetadataWorkUnit(id=wu_id, mce=mce)
|
|
419
412
|
|
|
420
413
|
yield MetadataChangeProposalWrapper(
|
|
421
|
-
entityType="corpuser",
|
|
422
414
|
entityUrn=datahub_corp_user_snapshot.urn,
|
|
423
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
424
|
-
aspectName="origin",
|
|
425
415
|
aspect=OriginClass(OriginTypeClass.EXTERNAL, "OKTA"),
|
|
426
416
|
).as_workunit()
|
|
427
417
|
|
|
428
418
|
yield MetadataChangeProposalWrapper(
|
|
429
|
-
entityType="corpuser",
|
|
430
419
|
entityUrn=datahub_corp_user_snapshot.urn,
|
|
431
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
432
|
-
aspectName="status",
|
|
433
420
|
aspect=StatusClass(removed=False),
|
|
434
421
|
).as_workunit()
|
|
435
422
|
|
|
@@ -294,8 +294,6 @@ class Mapper:
|
|
|
294
294
|
logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
|
|
295
295
|
|
|
296
296
|
mcp = MetadataChangeProposalWrapper(
|
|
297
|
-
entityType=Constant.DATASET,
|
|
298
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
299
297
|
entityUrn=ds_urn,
|
|
300
298
|
aspect=upstream_lineage_class,
|
|
301
299
|
)
|
|
@@ -538,9 +536,7 @@ class Mapper:
|
|
|
538
536
|
profile.columnCount = table.column_count
|
|
539
537
|
|
|
540
538
|
mcp = MetadataChangeProposalWrapper(
|
|
541
|
-
entityType="dataset",
|
|
542
539
|
entityUrn=ds_urn,
|
|
543
|
-
aspectName="datasetProfile",
|
|
544
540
|
aspect=profile,
|
|
545
541
|
)
|
|
546
542
|
dataset_mcps.append(mcp)
|
|
@@ -796,7 +792,6 @@ class Mapper:
|
|
|
796
792
|
guid=container_key.guid(),
|
|
797
793
|
)
|
|
798
794
|
mcp = MetadataChangeProposalWrapper(
|
|
799
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
800
795
|
entityUrn=entity_urn,
|
|
801
796
|
aspect=ContainerClass(container=f"{container_urn}"),
|
|
802
797
|
)
|
|
@@ -673,7 +673,6 @@ class PowerBiAPI:
|
|
|
673
673
|
fill_dashboard_tags()
|
|
674
674
|
self._fill_independent_datasets(workspace=workspace)
|
|
675
675
|
|
|
676
|
-
# flake8: noqa: C901
|
|
677
676
|
def fill_workspaces(
|
|
678
677
|
self, workspaces: List[Workspace], reporter: PowerBiDashboardSourceReport
|
|
679
678
|
) -> Iterable[Workspace]:
|
|
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
|
|
53
53
|
from datahub.metadata.schema_classes import (
|
|
54
54
|
BrowsePathsClass,
|
|
55
|
-
ChangeTypeClass,
|
|
56
55
|
CorpUserInfoClass,
|
|
57
56
|
CorpUserKeyClass,
|
|
58
57
|
DashboardInfoClass,
|
|
@@ -243,20 +242,14 @@ class Mapper:
|
|
|
243
242
|
|
|
244
243
|
@staticmethod
|
|
245
244
|
def new_mcp(
|
|
246
|
-
entity_type,
|
|
247
245
|
entity_urn,
|
|
248
|
-
aspect_name,
|
|
249
246
|
aspect,
|
|
250
|
-
change_type=ChangeTypeClass.UPSERT,
|
|
251
247
|
):
|
|
252
248
|
"""
|
|
253
249
|
Create MCP
|
|
254
250
|
"""
|
|
255
251
|
return MetadataChangeProposalWrapper(
|
|
256
|
-
entityType=entity_type,
|
|
257
|
-
changeType=change_type,
|
|
258
252
|
entityUrn=entity_urn,
|
|
259
|
-
aspectName=aspect_name,
|
|
260
253
|
aspect=aspect,
|
|
261
254
|
)
|
|
262
255
|
|
|
@@ -343,17 +336,13 @@ class Mapper:
|
|
|
343
336
|
)
|
|
344
337
|
|
|
345
338
|
info_mcp = self.new_mcp(
|
|
346
|
-
entity_type=Constant.DASHBOARD,
|
|
347
339
|
entity_urn=dashboard_urn,
|
|
348
|
-
aspect_name=Constant.DASHBOARD_INFO,
|
|
349
340
|
aspect=dashboard_info_cls,
|
|
350
341
|
)
|
|
351
342
|
|
|
352
343
|
# removed status mcp
|
|
353
344
|
removed_status_mcp = self.new_mcp(
|
|
354
|
-
entity_type=Constant.DASHBOARD,
|
|
355
345
|
entity_urn=dashboard_urn,
|
|
356
|
-
aspect_name=Constant.STATUS,
|
|
357
346
|
aspect=StatusClass(removed=False),
|
|
358
347
|
)
|
|
359
348
|
|
|
@@ -365,9 +354,7 @@ class Mapper:
|
|
|
365
354
|
|
|
366
355
|
# Dashboard key
|
|
367
356
|
dashboard_key_mcp = self.new_mcp(
|
|
368
|
-
entity_type=Constant.DASHBOARD,
|
|
369
357
|
entity_urn=dashboard_urn,
|
|
370
|
-
aspect_name=Constant.DASHBOARD_KEY,
|
|
371
358
|
aspect=dashboard_key_cls,
|
|
372
359
|
)
|
|
373
360
|
|
|
@@ -378,9 +365,7 @@ class Mapper:
|
|
|
378
365
|
ownership = OwnershipClass(owners=owners)
|
|
379
366
|
# Dashboard owner MCP
|
|
380
367
|
owner_mcp = self.new_mcp(
|
|
381
|
-
entity_type=Constant.DASHBOARD,
|
|
382
368
|
entity_urn=dashboard_urn,
|
|
383
|
-
aspect_name=Constant.OWNERSHIP,
|
|
384
369
|
aspect=ownership,
|
|
385
370
|
)
|
|
386
371
|
|
|
@@ -396,9 +381,7 @@ class Mapper:
|
|
|
396
381
|
]
|
|
397
382
|
)
|
|
398
383
|
browse_path_mcp = self.new_mcp(
|
|
399
|
-
entity_type=Constant.DASHBOARD,
|
|
400
384
|
entity_urn=dashboard_urn,
|
|
401
|
-
aspect_name=Constant.BROWSERPATH,
|
|
402
385
|
aspect=browse_path,
|
|
403
386
|
)
|
|
404
387
|
|
|
@@ -429,27 +412,21 @@ class Mapper:
|
|
|
429
412
|
)
|
|
430
413
|
|
|
431
414
|
info_mcp = self.new_mcp(
|
|
432
|
-
entity_type=Constant.CORP_USER,
|
|
433
415
|
entity_urn=user_urn,
|
|
434
|
-
aspect_name=Constant.CORP_USER_INFO,
|
|
435
416
|
aspect=user_info_instance,
|
|
436
417
|
)
|
|
437
418
|
user_mcps.append(info_mcp)
|
|
438
419
|
|
|
439
420
|
# removed status mcp
|
|
440
421
|
status_mcp = self.new_mcp(
|
|
441
|
-
entity_type=Constant.CORP_USER,
|
|
442
422
|
entity_urn=user_urn,
|
|
443
|
-
aspect_name=Constant.STATUS,
|
|
444
423
|
aspect=StatusClass(removed=False),
|
|
445
424
|
)
|
|
446
425
|
user_mcps.append(status_mcp)
|
|
447
426
|
user_key = CorpUserKeyClass(username=user.username)
|
|
448
427
|
|
|
449
428
|
user_key_mcp = self.new_mcp(
|
|
450
|
-
entity_type=Constant.CORP_USER,
|
|
451
429
|
entity_urn=user_urn,
|
|
452
|
-
aspect_name=Constant.CORP_USER_KEY,
|
|
453
430
|
aspect=user_key,
|
|
454
431
|
)
|
|
455
432
|
user_mcps.append(user_key_mcp)
|
|
@@ -30,6 +30,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
30
30
|
from datahub.ingestion.source.common.subtypes import (
|
|
31
31
|
BIContainerSubTypes,
|
|
32
32
|
DatasetSubTypes,
|
|
33
|
+
SourceCapabilityModifier,
|
|
33
34
|
)
|
|
34
35
|
from datahub.ingestion.source.sigma.config import (
|
|
35
36
|
PlatformDetail,
|
|
@@ -95,7 +96,11 @@ logger = logging.getLogger(__name__)
|
|
|
95
96
|
@platform_name("Sigma")
|
|
96
97
|
@config_class(SigmaSourceConfig)
|
|
97
98
|
@support_status(SupportStatus.INCUBATING)
|
|
98
|
-
@capability(
|
|
99
|
+
@capability(
|
|
100
|
+
SourceCapability.CONTAINERS,
|
|
101
|
+
"Enabled by default",
|
|
102
|
+
subtype_modifier=[SourceCapabilityModifier.SIGMA_WORKSPACE],
|
|
103
|
+
)
|
|
99
104
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
100
105
|
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default.")
|
|
101
106
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|