acryl-datahub 0.15.0rc20__py3-none-any.whl → 0.15.0rc22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/METADATA +2478 -2478
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/RECORD +28 -26
- datahub/__init__.py +1 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +56 -68
- datahub/cli/ingest_cli.py +110 -0
- datahub/emitter/rest_emitter.py +17 -4
- datahub/ingestion/sink/datahub_rest.py +12 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +41 -21
- datahub/ingestion/source/datahub/datahub_source.py +8 -1
- datahub/ingestion/source/kafka/kafka_connect.py +81 -51
- datahub/ingestion/source/s3/source.py +2 -3
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_query.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +16 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +23 -0
- datahub/ingestion/source/tableau/tableau.py +42 -3
- datahub/ingestion/source/tableau/tableau_common.py +12 -5
- datahub/ingestion/source/tableau/tableau_constant.py +2 -0
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +33 -0
- datahub/ingestion/source/tableau/tableau_validation.py +48 -0
- datahub/metadata/_schema_classes.py +400 -400
- datahub/metadata/_urns/urn_defs.py +1355 -1355
- datahub/metadata/schema.avsc +17221 -17574
- datahub/testing/compare_metadata_json.py +1 -1
- datahub/utilities/file_backed_collections.py +35 -2
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/top_level.txt +0 -0
|
@@ -117,7 +117,7 @@ def diff_metadata_json(
|
|
|
117
117
|
ignore_paths: Sequence[str] = (),
|
|
118
118
|
ignore_order: bool = True,
|
|
119
119
|
) -> Union[DeepDiff, MCPDiff]:
|
|
120
|
-
ignore_paths =
|
|
120
|
+
ignore_paths = [*ignore_paths, *default_exclude_paths, r"root\[\d+].delta_info"]
|
|
121
121
|
try:
|
|
122
122
|
if ignore_order:
|
|
123
123
|
golden_map = get_aspects_by_urn(golden)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import gzip
|
|
3
3
|
import logging
|
|
4
|
+
import os
|
|
4
5
|
import pathlib
|
|
5
6
|
import pickle
|
|
6
7
|
import shutil
|
|
@@ -33,6 +34,14 @@ from datahub.ingestion.api.closeable import Closeable
|
|
|
33
34
|
|
|
34
35
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
35
36
|
|
|
37
|
+
OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR = (
|
|
38
|
+
os.environ.get("OVERRIDE_SQLITE_VERSION_REQ") or ""
|
|
39
|
+
)
|
|
40
|
+
OVERRIDE_SQLITE_VERSION_REQUIREMENT = (
|
|
41
|
+
OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR
|
|
42
|
+
and OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR.lower() != "false"
|
|
43
|
+
)
|
|
44
|
+
|
|
36
45
|
_DEFAULT_FILE_NAME = "sqlite.db"
|
|
37
46
|
_DEFAULT_TABLE_NAME = "data"
|
|
38
47
|
|
|
@@ -212,6 +221,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
212
221
|
_active_object_cache: OrderedDict[str, Tuple[_VT, bool]] = field(
|
|
213
222
|
init=False, repr=False
|
|
214
223
|
)
|
|
224
|
+
_use_sqlite_on_conflict: bool = field(repr=False, default=True)
|
|
215
225
|
|
|
216
226
|
def __post_init__(self) -> None:
|
|
217
227
|
assert (
|
|
@@ -232,7 +242,10 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
232
242
|
# We use the ON CONFLICT clause to implement UPSERTs with sqlite.
|
|
233
243
|
# This was added in 3.24.0 from 2018-06-04.
|
|
234
244
|
# See https://www.sqlite.org/lang_conflict.html
|
|
235
|
-
|
|
245
|
+
if OVERRIDE_SQLITE_VERSION_REQUIREMENT:
|
|
246
|
+
self.use_sqlite_on_conflict = False
|
|
247
|
+
else:
|
|
248
|
+
raise RuntimeError("SQLite version 3.24.0 or later is required")
|
|
236
249
|
|
|
237
250
|
# We keep a small cache in memory to avoid having to serialize/deserialize
|
|
238
251
|
# data from the database too often. We use an OrderedDict to build
|
|
@@ -295,7 +308,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
295
308
|
values.append(column_serializer(value))
|
|
296
309
|
items_to_write.append(tuple(values))
|
|
297
310
|
|
|
298
|
-
if items_to_write:
|
|
311
|
+
if items_to_write and self._use_sqlite_on_conflict:
|
|
299
312
|
# Tricky: By using a INSERT INTO ... ON CONFLICT (key) structure, we can
|
|
300
313
|
# ensure that the rowid remains the same if a value is updated but is
|
|
301
314
|
# autoincremented when rows are inserted.
|
|
@@ -312,6 +325,26 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
312
325
|
""",
|
|
313
326
|
items_to_write,
|
|
314
327
|
)
|
|
328
|
+
else:
|
|
329
|
+
for item in items_to_write:
|
|
330
|
+
try:
|
|
331
|
+
self._conn.execute(
|
|
332
|
+
f"""INSERT INTO {self.tablename} (
|
|
333
|
+
key,
|
|
334
|
+
value
|
|
335
|
+
{''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
|
|
336
|
+
)
|
|
337
|
+
VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""",
|
|
338
|
+
item,
|
|
339
|
+
)
|
|
340
|
+
except sqlite3.IntegrityError:
|
|
341
|
+
self._conn.execute(
|
|
342
|
+
f"""UPDATE {self.tablename} SET
|
|
343
|
+
value = ?
|
|
344
|
+
{''.join(f', {column_name} = ?' for column_name in self.extra_columns.keys())}
|
|
345
|
+
WHERE key = ?""",
|
|
346
|
+
(*item[1:], item[0]),
|
|
347
|
+
)
|
|
315
348
|
|
|
316
349
|
def flush(self) -> None:
|
|
317
350
|
self._prune_cache(len(self._active_object_cache))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|