acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2rc2.dist-info}/METADATA +2451 -2441
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2rc2.dist-info}/RECORD +17 -17
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2rc2.dist-info}/WHEEL +1 -1
- datahub/__init__.py +1 -1
- datahub/cli/cli_utils.py +12 -1
- datahub/emitter/rest_emitter.py +125 -84
- datahub/ingestion/graph/client.py +14 -11
- datahub/ingestion/graph/config.py +1 -1
- datahub/ingestion/source/aws/glue.py +52 -35
- datahub/ingestion/source/bigquery_v2/bigquery.py +2 -0
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +8 -0
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +11 -7
- datahub/ingestion/source/snowflake/snowflake_config.py +8 -0
- datahub/ingestion/source/snowflake/snowflake_v2.py +2 -0
- datahub/utilities/file_backed_collections.py +1 -1
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2rc2.dist-info}/top_level.txt +0 -0
|
@@ -19,8 +19,8 @@ from datahub.utilities.urns._urn_base import Urn
|
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
query
|
|
22
|
+
QUERY_ENTITIES = """
|
|
23
|
+
query listEntities($input: ScrollAcrossEntitiesInput!) {
|
|
24
24
|
scrollAcrossEntities(input: $input) {
|
|
25
25
|
nextScrollId
|
|
26
26
|
count
|
|
@@ -29,6 +29,9 @@ query listQueries($input: ScrollAcrossEntitiesInput!) {
|
|
|
29
29
|
... on QueryEntity {
|
|
30
30
|
urn
|
|
31
31
|
}
|
|
32
|
+
... on DataProcessInstance {
|
|
33
|
+
urn
|
|
34
|
+
}
|
|
32
35
|
}
|
|
33
36
|
}
|
|
34
37
|
}
|
|
@@ -225,16 +228,16 @@ class SoftDeletedEntitiesCleanup:
|
|
|
225
228
|
time.sleep(self.config.delay)
|
|
226
229
|
return futures
|
|
227
230
|
|
|
228
|
-
def
|
|
231
|
+
def _get_soft_deleted(self, graphql_query: str, entity_type: str) -> Iterable[str]:
|
|
229
232
|
assert self.ctx.graph
|
|
230
233
|
scroll_id: Optional[str] = None
|
|
231
234
|
while True:
|
|
232
235
|
try:
|
|
233
236
|
result = self.ctx.graph.execute_graphql(
|
|
234
|
-
|
|
237
|
+
graphql_query,
|
|
235
238
|
{
|
|
236
239
|
"input": {
|
|
237
|
-
"types": [
|
|
240
|
+
"types": [entity_type],
|
|
238
241
|
"query": "*",
|
|
239
242
|
"scrollId": scroll_id if scroll_id else None,
|
|
240
243
|
"count": self.config.batch_size,
|
|
@@ -254,7 +257,7 @@ class SoftDeletedEntitiesCleanup:
|
|
|
254
257
|
)
|
|
255
258
|
except Exception as e:
|
|
256
259
|
self.report.failure(
|
|
257
|
-
f"While trying to get
|
|
260
|
+
f"While trying to get {entity_type} with {scroll_id}", exc=e
|
|
258
261
|
)
|
|
259
262
|
break
|
|
260
263
|
scroll_across_entities = result.get("scrollAcrossEntities")
|
|
@@ -275,7 +278,8 @@ class SoftDeletedEntitiesCleanup:
|
|
|
275
278
|
status=RemovedStatusFilter.ONLY_SOFT_DELETED,
|
|
276
279
|
batch_size=self.config.batch_size,
|
|
277
280
|
)
|
|
278
|
-
yield from self.
|
|
281
|
+
yield from self._get_soft_deleted(QUERY_ENTITIES, "QUERY")
|
|
282
|
+
yield from self._get_soft_deleted(QUERY_ENTITIES, "DATA_PROCESS_INSTANCE")
|
|
279
283
|
|
|
280
284
|
def _times_up(self) -> bool:
|
|
281
285
|
if (
|
|
@@ -221,6 +221,14 @@ class SnowflakeV2Config(
|
|
|
221
221
|
default=False,
|
|
222
222
|
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
|
|
223
223
|
)
|
|
224
|
+
include_queries: bool = Field(
|
|
225
|
+
default=True,
|
|
226
|
+
description="If enabled, generate query entities associated with lineage edges. Only applicable if `use_queries_v2` is enabled.",
|
|
227
|
+
)
|
|
228
|
+
include_query_usage_statistics: bool = Field(
|
|
229
|
+
default=True,
|
|
230
|
+
description="If enabled, generate query popularity statistics. Only applicable if `use_queries_v2` is enabled.",
|
|
231
|
+
)
|
|
224
232
|
|
|
225
233
|
lazy_schema_resolver: bool = Field(
|
|
226
234
|
default=True,
|
|
@@ -528,6 +528,8 @@ class SnowflakeV2Source(
|
|
|
528
528
|
include_lineage=self.config.include_table_lineage,
|
|
529
529
|
include_usage_statistics=self.config.include_usage_stats,
|
|
530
530
|
include_operations=self.config.include_operational_stats,
|
|
531
|
+
include_queries=self.config.include_queries,
|
|
532
|
+
include_query_usage_statistics=self.config.include_query_usage_statistics,
|
|
531
533
|
user_email_pattern=self.config.user_email_pattern,
|
|
532
534
|
),
|
|
533
535
|
structured_report=self.report,
|
|
@@ -243,7 +243,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
243
243
|
# This was added in 3.24.0 from 2018-06-04.
|
|
244
244
|
# See https://www.sqlite.org/lang_conflict.html
|
|
245
245
|
if OVERRIDE_SQLITE_VERSION_REQUIREMENT:
|
|
246
|
-
self.
|
|
246
|
+
self._use_sqlite_on_conflict = False
|
|
247
247
|
else:
|
|
248
248
|
raise RuntimeError("SQLite version 3.24.0 or later is required")
|
|
249
249
|
|
{acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2rc2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|