acryl-datahub 1.1.0.4rc3__py3-none-any.whl → 1.1.0.5rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc2.dist-info}/METADATA +2518 -2518
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc2.dist-info}/RECORD +41 -36
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc2.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/ingestion/api/decorators.py +1 -0
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/run/pipeline.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +12 -0
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +389 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_config.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +42 -31
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +15 -15
- datahub/ingestion/source/sql/vertica.py +1 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +1 -1
- datahub/ingestion/source/unity/source.py +1 -1
- datahub/metadata/_internal_schema_classes.py +3 -0
- datahub/metadata/schema.avsc +2 -0
- datahub/metadata/schemas/ContainerProperties.avsc +2 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +2 -0
- datahub/metadata/schemas/DataJobInfo.avsc +2 -0
- datahub/metadata/schemas/DataProcessKey.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +2 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -0
- datahub/metadata/schemas/MLModelKey.avsc +2 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +2 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +5 -2
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5rc2.dist-info}/top_level.txt +0 -0
|
@@ -63,7 +63,10 @@ from datahub.sql_parsing.sqlglot_lineage import (
|
|
|
63
63
|
DownstreamColumnRef,
|
|
64
64
|
)
|
|
65
65
|
from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
|
|
66
|
-
from datahub.utilities.file_backed_collections import
|
|
66
|
+
from datahub.utilities.file_backed_collections import (
|
|
67
|
+
ConnectionWrapper,
|
|
68
|
+
FileBackedList,
|
|
69
|
+
)
|
|
67
70
|
from datahub.utilities.perf_timer import PerfTimer
|
|
68
71
|
|
|
69
72
|
logger = logging.getLogger(__name__)
|
|
@@ -243,6 +246,12 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
243
246
|
audit_log_file = self.local_temp_path / "audit_log.sqlite"
|
|
244
247
|
use_cached_audit_log = audit_log_file.exists()
|
|
245
248
|
|
|
249
|
+
if self.config.local_temp_path is None:
|
|
250
|
+
self._exit_stack.callback(lambda: audit_log_file.unlink(missing_ok=True))
|
|
251
|
+
|
|
252
|
+
shared_connection = self._exit_stack.enter_context(
|
|
253
|
+
ConnectionWrapper(audit_log_file)
|
|
254
|
+
)
|
|
246
255
|
queries: FileBackedList[
|
|
247
256
|
Union[
|
|
248
257
|
KnownLineageMapping,
|
|
@@ -251,27 +260,16 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
251
260
|
TableSwap,
|
|
252
261
|
ObservedQuery,
|
|
253
262
|
]
|
|
254
|
-
]
|
|
263
|
+
] = self._exit_stack.enter_context(FileBackedList(shared_connection))
|
|
264
|
+
|
|
255
265
|
if use_cached_audit_log:
|
|
256
|
-
logger.info("Using cached audit log")
|
|
257
|
-
shared_connection = ConnectionWrapper(audit_log_file)
|
|
258
|
-
queries = FileBackedList(shared_connection)
|
|
266
|
+
logger.info(f"Using cached audit log at {audit_log_file}")
|
|
259
267
|
else:
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
shared_connection = ConnectionWrapper(audit_log_file)
|
|
263
|
-
queries = FileBackedList(shared_connection)
|
|
264
|
-
entry: Union[
|
|
265
|
-
KnownLineageMapping,
|
|
266
|
-
PreparsedQuery,
|
|
267
|
-
TableRename,
|
|
268
|
-
TableSwap,
|
|
269
|
-
ObservedQuery,
|
|
270
|
-
]
|
|
268
|
+
logger.info(f"Fetching audit log into {audit_log_file}")
|
|
271
269
|
|
|
272
270
|
with self.report.copy_history_fetch_timer:
|
|
273
|
-
for
|
|
274
|
-
queries.append(
|
|
271
|
+
for copy_entry in self.fetch_copy_history():
|
|
272
|
+
queries.append(copy_entry)
|
|
275
273
|
|
|
276
274
|
with self.report.query_log_fetch_timer:
|
|
277
275
|
for entry in self.fetch_query_log(users):
|
|
@@ -281,13 +279,10 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
281
279
|
for i, query in enumerate(queries):
|
|
282
280
|
if i % 1000 == 0:
|
|
283
281
|
logger.info(f"Added {i} query log entries to SQL aggregator")
|
|
282
|
+
|
|
284
283
|
self.aggregator.add(query)
|
|
285
284
|
|
|
286
285
|
yield from auto_workunit(self.aggregator.gen_metadata())
|
|
287
|
-
if not use_cached_audit_log:
|
|
288
|
-
queries.close()
|
|
289
|
-
shared_connection.close()
|
|
290
|
-
audit_log_file.unlink(missing_ok=True)
|
|
291
286
|
|
|
292
287
|
def fetch_users(self) -> UsersMapping:
|
|
293
288
|
users: UsersMapping = dict()
|
|
@@ -403,8 +398,9 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
403
398
|
|
|
404
399
|
# TODO need to map snowflake query types to ours
|
|
405
400
|
query_text: str = res["query_text"]
|
|
401
|
+
snowflake_query_type: str = res["query_type"]
|
|
406
402
|
query_type: QueryType = SNOWFLAKE_QUERY_TYPE_MAPPING.get(
|
|
407
|
-
|
|
403
|
+
snowflake_query_type, QueryType.UNKNOWN
|
|
408
404
|
)
|
|
409
405
|
|
|
410
406
|
direct_objects_accessed = res["direct_objects_accessed"]
|
|
@@ -421,7 +417,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
421
417
|
res["session_id"],
|
|
422
418
|
timestamp,
|
|
423
419
|
object_modified_by_ddl,
|
|
424
|
-
|
|
420
|
+
snowflake_query_type,
|
|
425
421
|
)
|
|
426
422
|
if known_ddl_entry:
|
|
427
423
|
return known_ddl_entry
|
|
@@ -436,6 +432,16 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
436
432
|
res["user_name"], users.get(res["user_name"])
|
|
437
433
|
)
|
|
438
434
|
)
|
|
435
|
+
extra_info = {
|
|
436
|
+
"snowflake_query_id": res["query_id"],
|
|
437
|
+
"snowflake_root_query_id": res["root_query_id"],
|
|
438
|
+
"snowflake_query_type": res["query_type"],
|
|
439
|
+
"snowflake_role_name": res["role_name"],
|
|
440
|
+
"query_duration": res["query_duration"],
|
|
441
|
+
"rows_inserted": res["rows_inserted"],
|
|
442
|
+
"rows_updated": res["rows_updated"],
|
|
443
|
+
"rows_deleted": res["rows_deleted"],
|
|
444
|
+
}
|
|
439
445
|
|
|
440
446
|
# There are a couple cases when we'd want to prefer our own SQL parsing
|
|
441
447
|
# over Snowflake's metadata.
|
|
@@ -470,6 +476,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
470
476
|
query_hash=get_query_fingerprint(
|
|
471
477
|
query_text, self.identifiers.platform, fast=True
|
|
472
478
|
),
|
|
479
|
+
extra_info=extra_info,
|
|
473
480
|
)
|
|
474
481
|
|
|
475
482
|
upstreams = []
|
|
@@ -556,6 +563,7 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
556
563
|
timestamp=timestamp,
|
|
557
564
|
session_id=res["session_id"],
|
|
558
565
|
query_type=query_type,
|
|
566
|
+
extra_info=extra_info,
|
|
559
567
|
)
|
|
560
568
|
return entry
|
|
561
569
|
|
|
@@ -667,7 +675,7 @@ def _build_enriched_query_log_query(
|
|
|
667
675
|
start_time_millis = int(start_time.timestamp() * 1000)
|
|
668
676
|
end_time_millis = int(end_time.timestamp() * 1000)
|
|
669
677
|
|
|
670
|
-
users_filter = ""
|
|
678
|
+
users_filter = "TRUE"
|
|
671
679
|
if deny_usernames:
|
|
672
680
|
user_not_in = ",".join(f"'{user.upper()}'" for user in deny_usernames)
|
|
673
681
|
users_filter = f"user_name NOT IN ({user_not_in})"
|
|
@@ -694,10 +702,10 @@ fingerprinted_queries as (
|
|
|
694
702
|
FROM
|
|
695
703
|
snowflake.account_usage.query_history
|
|
696
704
|
WHERE
|
|
697
|
-
query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3)
|
|
698
|
-
AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3)
|
|
705
|
+
query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3) -- {start_time.isoformat()}
|
|
706
|
+
AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3) -- {end_time.isoformat()}
|
|
699
707
|
AND execution_status = 'SUCCESS'
|
|
700
|
-
AND {users_filter
|
|
708
|
+
AND {users_filter}
|
|
701
709
|
)
|
|
702
710
|
, deduplicated_queries as (
|
|
703
711
|
SELECT
|
|
@@ -715,6 +723,7 @@ fingerprinted_queries as (
|
|
|
715
723
|
, raw_access_history AS (
|
|
716
724
|
SELECT
|
|
717
725
|
query_id,
|
|
726
|
+
root_query_id,
|
|
718
727
|
query_start_time,
|
|
719
728
|
user_name,
|
|
720
729
|
direct_objects_accessed,
|
|
@@ -723,9 +732,9 @@ fingerprinted_queries as (
|
|
|
723
732
|
FROM
|
|
724
733
|
snowflake.account_usage.access_history
|
|
725
734
|
WHERE
|
|
726
|
-
query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
|
|
727
|
-
AND query_start_time < to_timestamp_ltz({end_time_millis}, 3)
|
|
728
|
-
AND {users_filter
|
|
735
|
+
query_start_time >= to_timestamp_ltz({start_time_millis}, 3) -- {start_time.isoformat()}
|
|
736
|
+
AND query_start_time < to_timestamp_ltz({end_time_millis}, 3) -- {end_time.isoformat()}
|
|
737
|
+
AND {users_filter}
|
|
729
738
|
AND query_id IN (
|
|
730
739
|
SELECT query_id FROM deduplicated_queries
|
|
731
740
|
)
|
|
@@ -734,6 +743,7 @@ fingerprinted_queries as (
|
|
|
734
743
|
-- TODO: Add table filter clause.
|
|
735
744
|
SELECT
|
|
736
745
|
query_id,
|
|
746
|
+
root_query_id,
|
|
737
747
|
query_start_time,
|
|
738
748
|
ARRAY_SLICE(
|
|
739
749
|
FILTER(direct_objects_accessed, o -> o:objectDomain IN {SnowflakeQuery.ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER}),
|
|
@@ -764,6 +774,7 @@ fingerprinted_queries as (
|
|
|
764
774
|
q.rows_deleted AS "ROWS_DELETED",
|
|
765
775
|
q.user_name AS "USER_NAME",
|
|
766
776
|
q.role_name AS "ROLE_NAME",
|
|
777
|
+
a.root_query_id,
|
|
767
778
|
a.direct_objects_accessed,
|
|
768
779
|
a.objects_modified,
|
|
769
780
|
a.object_modified_by_ddl
|
|
@@ -118,7 +118,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
118
118
|
)
|
|
119
119
|
@capability(
|
|
120
120
|
SourceCapability.DELETION_DETECTION,
|
|
121
|
-
"
|
|
121
|
+
"Enabled by default via stateful ingestion",
|
|
122
122
|
supported=True,
|
|
123
123
|
)
|
|
124
124
|
@capability(
|
|
@@ -936,25 +936,25 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
936
936
|
url = self.config.get_sql_alchemy_url()
|
|
937
937
|
logger.debug(f"sql_alchemy_url={url}")
|
|
938
938
|
engine = create_engine(url, **self.config.options)
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
939
|
+
|
|
940
|
+
if self.config.database and self.config.database != "":
|
|
941
|
+
inspector = inspect(engine)
|
|
942
|
+
yield inspector
|
|
943
|
+
else:
|
|
944
|
+
with engine.begin() as conn:
|
|
944
945
|
databases = conn.execute(
|
|
945
946
|
"SELECT name FROM master.sys.databases WHERE name NOT IN \
|
|
946
947
|
('master', 'model', 'msdb', 'tempdb', 'Resource', \
|
|
947
948
|
'distribution' , 'reportserver', 'reportservertempdb'); "
|
|
948
|
-
)
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
yield inspector
|
|
949
|
+
).fetchall()
|
|
950
|
+
|
|
951
|
+
for db in databases:
|
|
952
|
+
if self.config.database_pattern.allowed(db["name"]):
|
|
953
|
+
url = self.config.get_sql_alchemy_url(current_db=db["name"])
|
|
954
|
+
engine = create_engine(url, **self.config.options)
|
|
955
|
+
inspector = inspect(engine)
|
|
956
|
+
self.current_database = db["name"]
|
|
957
|
+
yield inspector
|
|
958
958
|
|
|
959
959
|
def get_identifier(
|
|
960
960
|
self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any
|
|
@@ -116,7 +116,7 @@ class VerticaConfig(BasicSQLAlchemyConfig):
|
|
|
116
116
|
)
|
|
117
117
|
@capability(
|
|
118
118
|
SourceCapability.DELETION_DETECTION,
|
|
119
|
-
"
|
|
119
|
+
"Enabled by default via stateful ingestion",
|
|
120
120
|
supported=True,
|
|
121
121
|
)
|
|
122
122
|
class VerticaSource(SQLAlchemySource):
|
|
@@ -179,7 +179,7 @@ class StatefulIngestionReport(SourceReport):
|
|
|
179
179
|
|
|
180
180
|
@capability(
|
|
181
181
|
SourceCapability.DELETION_DETECTION,
|
|
182
|
-
"
|
|
182
|
+
"Enabled by default via stateful ingestion",
|
|
183
183
|
supported=True,
|
|
184
184
|
)
|
|
185
185
|
class StatefulIngestionSourceBase(Source):
|
|
@@ -272,7 +272,7 @@ def get_filter_name(filter_obj):
|
|
|
272
272
|
@config_class(SupersetConfig)
|
|
273
273
|
@support_status(SupportStatus.CERTIFIED)
|
|
274
274
|
@capability(
|
|
275
|
-
SourceCapability.DELETION_DETECTION, "
|
|
275
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
276
276
|
)
|
|
277
277
|
@capability(SourceCapability.DOMAINS, "Enabled by `domain` config to assign domain_key")
|
|
278
278
|
@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
|
|
@@ -159,7 +159,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
159
159
|
)
|
|
160
160
|
@capability(
|
|
161
161
|
SourceCapability.DELETION_DETECTION,
|
|
162
|
-
"
|
|
162
|
+
"Enabled by default via stateful ingestion",
|
|
163
163
|
supported=True,
|
|
164
164
|
)
|
|
165
165
|
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -9508,6 +9508,7 @@
|
|
|
9508
9508
|
"QA": "Designates quality assurance fabrics",
|
|
9509
9509
|
"RVW": "Designates review fabrics",
|
|
9510
9510
|
"SANDBOX": "Designates sandbox fabrics",
|
|
9511
|
+
"SBX": "Alternative spelling for sandbox",
|
|
9511
9512
|
"SIT": "System Integration Testing",
|
|
9512
9513
|
"STG": "Designates staging fabrics",
|
|
9513
9514
|
"TEST": "Designates testing fabrics",
|
|
@@ -9531,6 +9532,7 @@
|
|
|
9531
9532
|
"PRD",
|
|
9532
9533
|
"TST",
|
|
9533
9534
|
"SIT",
|
|
9535
|
+
"SBX",
|
|
9534
9536
|
"SANDBOX"
|
|
9535
9537
|
],
|
|
9536
9538
|
"doc": "Fabric group type"
|
|
@@ -99,6 +99,7 @@
|
|
|
99
99
|
"QA": "Designates quality assurance fabrics",
|
|
100
100
|
"RVW": "Designates review fabrics",
|
|
101
101
|
"SANDBOX": "Designates sandbox fabrics",
|
|
102
|
+
"SBX": "Alternative spelling for sandbox",
|
|
102
103
|
"SIT": "System Integration Testing",
|
|
103
104
|
"STG": "Designates staging fabrics",
|
|
104
105
|
"TEST": "Designates testing fabrics",
|
|
@@ -122,6 +123,7 @@
|
|
|
122
123
|
"PRD",
|
|
123
124
|
"TST",
|
|
124
125
|
"SIT",
|
|
126
|
+
"SBX",
|
|
125
127
|
"SANDBOX"
|
|
126
128
|
],
|
|
127
129
|
"doc": "Fabric group type"
|
|
@@ -153,6 +153,7 @@
|
|
|
153
153
|
"QA": "Designates quality assurance fabrics",
|
|
154
154
|
"RVW": "Designates review fabrics",
|
|
155
155
|
"SANDBOX": "Designates sandbox fabrics",
|
|
156
|
+
"SBX": "Alternative spelling for sandbox",
|
|
156
157
|
"SIT": "System Integration Testing",
|
|
157
158
|
"STG": "Designates staging fabrics",
|
|
158
159
|
"TEST": "Designates testing fabrics",
|
|
@@ -176,6 +177,7 @@
|
|
|
176
177
|
"PRD",
|
|
177
178
|
"TST",
|
|
178
179
|
"SIT",
|
|
180
|
+
"SBX",
|
|
179
181
|
"SANDBOX"
|
|
180
182
|
],
|
|
181
183
|
"doc": "Fabric group type"
|
|
@@ -219,6 +219,7 @@
|
|
|
219
219
|
"QA": "Designates quality assurance fabrics",
|
|
220
220
|
"RVW": "Designates review fabrics",
|
|
221
221
|
"SANDBOX": "Designates sandbox fabrics",
|
|
222
|
+
"SBX": "Alternative spelling for sandbox",
|
|
222
223
|
"SIT": "System Integration Testing",
|
|
223
224
|
"STG": "Designates staging fabrics",
|
|
224
225
|
"TEST": "Designates testing fabrics",
|
|
@@ -242,6 +243,7 @@
|
|
|
242
243
|
"PRD",
|
|
243
244
|
"TST",
|
|
244
245
|
"SIT",
|
|
246
|
+
"SBX",
|
|
245
247
|
"SANDBOX"
|
|
246
248
|
],
|
|
247
249
|
"doc": "Fabric group type"
|
|
@@ -52,6 +52,7 @@
|
|
|
52
52
|
"QA": "Designates quality assurance fabrics",
|
|
53
53
|
"RVW": "Designates review fabrics",
|
|
54
54
|
"SANDBOX": "Designates sandbox fabrics",
|
|
55
|
+
"SBX": "Alternative spelling for sandbox",
|
|
55
56
|
"SIT": "System Integration Testing",
|
|
56
57
|
"STG": "Designates staging fabrics",
|
|
57
58
|
"TEST": "Designates testing fabrics",
|
|
@@ -75,6 +76,7 @@
|
|
|
75
76
|
"PRD",
|
|
76
77
|
"TST",
|
|
77
78
|
"SIT",
|
|
79
|
+
"SBX",
|
|
78
80
|
"SANDBOX"
|
|
79
81
|
],
|
|
80
82
|
"doc": "Fabric group type"
|
|
@@ -89,6 +89,7 @@
|
|
|
89
89
|
"QA": "Designates quality assurance fabrics",
|
|
90
90
|
"RVW": "Designates review fabrics",
|
|
91
91
|
"SANDBOX": "Designates sandbox fabrics",
|
|
92
|
+
"SBX": "Alternative spelling for sandbox",
|
|
92
93
|
"SIT": "System Integration Testing",
|
|
93
94
|
"STG": "Designates staging fabrics",
|
|
94
95
|
"TEST": "Designates testing fabrics",
|
|
@@ -112,6 +113,7 @@
|
|
|
112
113
|
"PRD",
|
|
113
114
|
"TST",
|
|
114
115
|
"SIT",
|
|
116
|
+
"SBX",
|
|
115
117
|
"SANDBOX"
|
|
116
118
|
],
|
|
117
119
|
"doc": "Fabric group type"
|
|
@@ -64,6 +64,7 @@
|
|
|
64
64
|
"QA": "Designates quality assurance fabrics",
|
|
65
65
|
"RVW": "Designates review fabrics",
|
|
66
66
|
"SANDBOX": "Designates sandbox fabrics",
|
|
67
|
+
"SBX": "Alternative spelling for sandbox",
|
|
67
68
|
"SIT": "System Integration Testing",
|
|
68
69
|
"STG": "Designates staging fabrics",
|
|
69
70
|
"TEST": "Designates testing fabrics",
|
|
@@ -87,6 +88,7 @@
|
|
|
87
88
|
"PRD",
|
|
88
89
|
"TST",
|
|
89
90
|
"SIT",
|
|
91
|
+
"SBX",
|
|
90
92
|
"SANDBOX"
|
|
91
93
|
],
|
|
92
94
|
"doc": "Fabric group type"
|
|
@@ -60,6 +60,7 @@
|
|
|
60
60
|
"QA": "Designates quality assurance fabrics",
|
|
61
61
|
"RVW": "Designates review fabrics",
|
|
62
62
|
"SANDBOX": "Designates sandbox fabrics",
|
|
63
|
+
"SBX": "Alternative spelling for sandbox",
|
|
63
64
|
"SIT": "System Integration Testing",
|
|
64
65
|
"STG": "Designates staging fabrics",
|
|
65
66
|
"TEST": "Designates testing fabrics",
|
|
@@ -83,6 +84,7 @@
|
|
|
83
84
|
"PRD",
|
|
84
85
|
"TST",
|
|
85
86
|
"SIT",
|
|
87
|
+
"SBX",
|
|
86
88
|
"SANDBOX"
|
|
87
89
|
],
|
|
88
90
|
"doc": "Fabric group type"
|
|
@@ -67,6 +67,7 @@
|
|
|
67
67
|
"QA": "Designates quality assurance fabrics",
|
|
68
68
|
"RVW": "Designates review fabrics",
|
|
69
69
|
"SANDBOX": "Designates sandbox fabrics",
|
|
70
|
+
"SBX": "Alternative spelling for sandbox",
|
|
70
71
|
"SIT": "System Integration Testing",
|
|
71
72
|
"STG": "Designates staging fabrics",
|
|
72
73
|
"TEST": "Designates testing fabrics",
|
|
@@ -90,6 +91,7 @@
|
|
|
90
91
|
"PRD",
|
|
91
92
|
"TST",
|
|
92
93
|
"SIT",
|
|
94
|
+
"SBX",
|
|
93
95
|
"SANDBOX"
|
|
94
96
|
],
|
|
95
97
|
"doc": "Fabric group type"
|
|
@@ -81,6 +81,7 @@
|
|
|
81
81
|
"QA": "Designates quality assurance fabrics",
|
|
82
82
|
"RVW": "Designates review fabrics",
|
|
83
83
|
"SANDBOX": "Designates sandbox fabrics",
|
|
84
|
+
"SBX": "Alternative spelling for sandbox",
|
|
84
85
|
"SIT": "System Integration Testing",
|
|
85
86
|
"STG": "Designates staging fabrics",
|
|
86
87
|
"TEST": "Designates testing fabrics",
|
|
@@ -104,6 +105,7 @@
|
|
|
104
105
|
"PRD",
|
|
105
106
|
"TST",
|
|
106
107
|
"SIT",
|
|
108
|
+
"SBX",
|
|
107
109
|
"SANDBOX"
|
|
108
110
|
],
|
|
109
111
|
"doc": "Fabric group type"
|
|
@@ -2430,6 +2430,7 @@
|
|
|
2430
2430
|
"QA": "Designates quality assurance fabrics",
|
|
2431
2431
|
"RVW": "Designates review fabrics",
|
|
2432
2432
|
"SANDBOX": "Designates sandbox fabrics",
|
|
2433
|
+
"SBX": "Alternative spelling for sandbox",
|
|
2433
2434
|
"SIT": "System Integration Testing",
|
|
2434
2435
|
"STG": "Designates staging fabrics",
|
|
2435
2436
|
"TEST": "Designates testing fabrics",
|
|
@@ -2453,6 +2454,7 @@
|
|
|
2453
2454
|
"PRD",
|
|
2454
2455
|
"TST",
|
|
2455
2456
|
"SIT",
|
|
2457
|
+
"SBX",
|
|
2456
2458
|
"SANDBOX"
|
|
2457
2459
|
],
|
|
2458
2460
|
"doc": "Fabric group type"
|
|
@@ -58,6 +58,7 @@ from datahub.sql_parsing.tool_meta_extractor import (
|
|
|
58
58
|
ToolMetaExtractorReport,
|
|
59
59
|
)
|
|
60
60
|
from datahub.utilities.cooperative_timeout import CooperativeTimeoutError
|
|
61
|
+
from datahub.utilities.dedup_list import deduplicate_list
|
|
61
62
|
from datahub.utilities.file_backed_collections import (
|
|
62
63
|
ConnectionWrapper,
|
|
63
64
|
FileBackedDict,
|
|
@@ -140,6 +141,7 @@ class QueryMetadata:
|
|
|
140
141
|
|
|
141
142
|
used_temp_tables: bool = True
|
|
142
143
|
|
|
144
|
+
extra_info: Optional[dict] = None
|
|
143
145
|
origin: Optional[Urn] = None
|
|
144
146
|
|
|
145
147
|
def make_created_audit_stamp(self) -> models.AuditStampClass:
|
|
@@ -263,7 +265,7 @@ class PreparsedQuery:
|
|
|
263
265
|
query_type_props: QueryTypeProps = dataclasses.field(
|
|
264
266
|
default_factory=lambda: QueryTypeProps()
|
|
265
267
|
)
|
|
266
|
-
# Use this to store
|
|
268
|
+
# Use this to store additional key-value information about the query for debugging.
|
|
267
269
|
extra_info: Optional[dict] = None
|
|
268
270
|
origin: Optional[Urn] = None
|
|
269
271
|
|
|
@@ -948,6 +950,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
948
950
|
column_usage=parsed.column_usage or {},
|
|
949
951
|
confidence_score=parsed.confidence_score,
|
|
950
952
|
used_temp_tables=session_has_temp_tables,
|
|
953
|
+
extra_info=parsed.extra_info,
|
|
951
954
|
origin=parsed.origin,
|
|
952
955
|
)
|
|
953
956
|
)
|
|
@@ -1706,7 +1709,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
1706
1709
|
)
|
|
1707
1710
|
|
|
1708
1711
|
merged_query_text = ";\n\n".join(
|
|
1709
|
-
[q.formatted_query_string for q in ordered_queries]
|
|
1712
|
+
deduplicate_list([q.formatted_query_string for q in ordered_queries])
|
|
1710
1713
|
)
|
|
1711
1714
|
|
|
1712
1715
|
resolved_query = dataclasses.replace(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|