castor-extractor 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- castor_extractor/uploader/upload.py +1 -1
- castor_extractor/utils/client/api.py +7 -2
- castor_extractor/warehouse/databricks/client.py +17 -2
- castor_extractor/warehouse/databricks/format.py +22 -3
- castor_extractor/warehouse/databricks/format_test.py +35 -1
- {castor_extractor-0.17.0.dist-info → castor_extractor-0.17.2.dist-info}/METADATA +1 -1
- {castor_extractor-0.17.0.dist-info → castor_extractor-0.17.2.dist-info}/RECORD +11 -11
- {castor_extractor-0.17.0.dist-info → castor_extractor-0.17.2.dist-info}/LICENCE +0 -0
- {castor_extractor-0.17.0.dist-info → castor_extractor-0.17.2.dist-info}/WHEEL +0 -0
- {castor_extractor-0.17.0.dist-info → castor_extractor-0.17.2.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -58,7 +58,12 @@ class APIClient:
|
|
|
58
58
|
|
|
59
59
|
return result.json()
|
|
60
60
|
|
|
61
|
-
def get(
|
|
61
|
+
def get(
|
|
62
|
+
self,
|
|
63
|
+
path: str,
|
|
64
|
+
payload: Optional[dict] = None,
|
|
65
|
+
processor: Optional[Callable] = None,
|
|
66
|
+
) -> dict:
|
|
62
67
|
"""path: REST API operation path, such as /api/2.0/clusters/get"""
|
|
63
68
|
url = self.build_url(self._host, path)
|
|
64
|
-
return self._call(url=url, data=payload)
|
|
69
|
+
return self._call(url=url, data=payload, processor=processor)
|
|
@@ -5,6 +5,7 @@ from functools import partial
|
|
|
5
5
|
from typing import Any, Dict, List, Optional, Set, Tuple, cast
|
|
6
6
|
|
|
7
7
|
import requests
|
|
8
|
+
from requests import Response
|
|
8
9
|
|
|
9
10
|
from ...utils import (
|
|
10
11
|
SafeMode,
|
|
@@ -30,6 +31,7 @@ _RETRY_BASE_MS = 1000
|
|
|
30
31
|
_RETRY_EXCEPTIONS = [
|
|
31
32
|
requests.exceptions.ConnectTimeout,
|
|
32
33
|
]
|
|
34
|
+
_WORKSPACE_ID_HEADER = "X-Databricks-Org-Id"
|
|
33
35
|
|
|
34
36
|
safe_params = SafeMode((BaseException,), _MAX_NUMBER_OF_LINEAGE_ERRORS)
|
|
35
37
|
|
|
@@ -120,15 +122,28 @@ class DatabricksClient(APIClient):
|
|
|
120
122
|
for schema in self._schemas_of_database(database)
|
|
121
123
|
]
|
|
122
124
|
|
|
125
|
+
@staticmethod
|
|
126
|
+
def _process_table_response(response: Response) -> Tuple[dict, str]:
|
|
127
|
+
"""
|
|
128
|
+
Returns both the JSON content and the Workspace ID, which is found
|
|
129
|
+
in the response's headers.
|
|
130
|
+
"""
|
|
131
|
+
return response.json(), response.headers[_WORKSPACE_ID_HEADER]
|
|
132
|
+
|
|
123
133
|
def _tables_columns_of_schema(self, schema: dict) -> TablesColumns:
|
|
124
134
|
path = "api/2.1/unity-catalog/tables"
|
|
125
135
|
payload = {
|
|
126
136
|
"catalog_name": schema["database_id"],
|
|
127
137
|
"schema_name": schema["schema_name"],
|
|
128
138
|
}
|
|
129
|
-
content = self.get(
|
|
139
|
+
content, workspace_id = self.get(
|
|
140
|
+
path=path,
|
|
141
|
+
payload=payload,
|
|
142
|
+
processor=self._process_table_response,
|
|
143
|
+
)
|
|
144
|
+
host = self.build_url(self._host, path="")
|
|
130
145
|
return self.formatter.format_table_column(
|
|
131
|
-
content.get("tables", []), schema
|
|
146
|
+
content.get("tables", []), schema, host, workspace_id
|
|
132
147
|
)
|
|
133
148
|
|
|
134
149
|
@staticmethod
|
|
@@ -9,6 +9,8 @@ logger = logging.getLogger(__name__)
|
|
|
9
9
|
EXCLUDED_DATABASES = {"system"}
|
|
10
10
|
EXCLUDED_SCHEMAS = {"information_schema", "default"}
|
|
11
11
|
|
|
12
|
+
TABLE_URL_TPL = "{host}explore/data/{catalog_name}/{schema_name}/{table_name}?o={workspace_id}"
|
|
13
|
+
|
|
12
14
|
|
|
13
15
|
def _to_datetime_or_none(time_ms: Optional[int]) -> Optional[datetime]:
|
|
14
16
|
"""return time in ms as datetime or None"""
|
|
@@ -17,7 +19,23 @@ def _to_datetime_or_none(time_ms: Optional[int]) -> Optional[datetime]:
|
|
|
17
19
|
return datetime.fromtimestamp(time_ms / 1000.0)
|
|
18
20
|
|
|
19
21
|
|
|
20
|
-
def _table_payload(
|
|
22
|
+
def _table_payload(
|
|
23
|
+
schema: dict,
|
|
24
|
+
table: dict,
|
|
25
|
+
host: str,
|
|
26
|
+
workspace_id: str,
|
|
27
|
+
) -> dict:
|
|
28
|
+
"""
|
|
29
|
+
Prepares the table payload. This also includes a source link which is built
|
|
30
|
+
here using the host and workspace_id.
|
|
31
|
+
"""
|
|
32
|
+
url = TABLE_URL_TPL.format(
|
|
33
|
+
host=host,
|
|
34
|
+
catalog_name=table["catalog_name"],
|
|
35
|
+
schema_name=table["schema_name"],
|
|
36
|
+
table_name=table["name"],
|
|
37
|
+
workspace_id=workspace_id,
|
|
38
|
+
)
|
|
21
39
|
return {
|
|
22
40
|
"description": table.get("comment"),
|
|
23
41
|
"id": table["table_id"],
|
|
@@ -26,6 +44,7 @@ def _table_payload(schema: dict, table: dict) -> dict:
|
|
|
26
44
|
"table_name": table["name"],
|
|
27
45
|
"tags": [],
|
|
28
46
|
"type": table.get("table_type"),
|
|
47
|
+
"url": url,
|
|
29
48
|
}
|
|
30
49
|
|
|
31
50
|
|
|
@@ -78,14 +97,14 @@ class DatabricksFormatter:
|
|
|
78
97
|
|
|
79
98
|
@staticmethod
|
|
80
99
|
def format_table_column(
|
|
81
|
-
raw_tables: List[dict], schema: dict
|
|
100
|
+
raw_tables: List[dict], schema: dict, host: str, workspace_id: str
|
|
82
101
|
) -> TablesColumns:
|
|
83
102
|
tables = []
|
|
84
103
|
columns = []
|
|
85
104
|
if not raw_tables:
|
|
86
105
|
return [], []
|
|
87
106
|
for table in raw_tables:
|
|
88
|
-
t = _table_payload(schema, table)
|
|
107
|
+
t = _table_payload(schema, table, host, workspace_id)
|
|
89
108
|
tables.append(t)
|
|
90
109
|
if not table.get("columns"):
|
|
91
110
|
continue
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
|
|
3
|
-
from .format import
|
|
3
|
+
from .format import (
|
|
4
|
+
DatabricksFormatter,
|
|
5
|
+
_column_payload,
|
|
6
|
+
_table_payload,
|
|
7
|
+
_to_datetime_or_none,
|
|
8
|
+
)
|
|
4
9
|
|
|
5
10
|
|
|
6
11
|
def test__to_datetime_or_none():
|
|
@@ -23,6 +28,35 @@ def test_DatabricksFormatter__primary():
|
|
|
23
28
|
assert DatabricksFormatter._primary([]) is None
|
|
24
29
|
|
|
25
30
|
|
|
31
|
+
def test__table_payload():
|
|
32
|
+
schema = {"id": "id123"}
|
|
33
|
+
|
|
34
|
+
table = {
|
|
35
|
+
"name": "baz",
|
|
36
|
+
"catalog_name": "foo",
|
|
37
|
+
"schema_name": "bar",
|
|
38
|
+
"table_type": "MANAGED",
|
|
39
|
+
"owner": "pot@ato.com",
|
|
40
|
+
"table_id": "732pot5e-8ato-4c27-b701-9fa51febc192",
|
|
41
|
+
}
|
|
42
|
+
host = "https://some.cloud.databricks.net/"
|
|
43
|
+
workspace_id = "123456"
|
|
44
|
+
|
|
45
|
+
payload = _table_payload(schema, table, host, workspace_id)
|
|
46
|
+
|
|
47
|
+
expected = {
|
|
48
|
+
"description": None,
|
|
49
|
+
"id": "732pot5e-8ato-4c27-b701-9fa51febc192",
|
|
50
|
+
"owner_email": "pot@ato.com",
|
|
51
|
+
"schema_id": "id123",
|
|
52
|
+
"table_name": "baz",
|
|
53
|
+
"tags": [],
|
|
54
|
+
"type": "MANAGED",
|
|
55
|
+
"url": "https://some.cloud.databricks.net/explore/data/foo/bar/baz?o=123456",
|
|
56
|
+
}
|
|
57
|
+
assert payload == expected
|
|
58
|
+
|
|
59
|
+
|
|
26
60
|
def test__column_payload():
|
|
27
61
|
table = {
|
|
28
62
|
"id": "18175cd5-9b9b-4d78-9d28-caaa12c21ce0",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=qR-os6GsyPBBkE3MwBCB4nYlMULY-D0vqKLXDcamyMU,11078
|
|
2
2
|
Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
|
|
3
3
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
4
4
|
README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
|
|
@@ -41,13 +41,13 @@ castor_extractor/uploader/__init__.py,sha256=SSRtwjg-dNoxME-RJy9G1flASiUKAC5bH1h
|
|
|
41
41
|
castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
|
|
42
42
|
castor_extractor/uploader/env.py,sha256=5HSniVSOYVg4u38O4k8TB_qaJq9s8yJ1hjedkq_gdVg,878
|
|
43
43
|
castor_extractor/uploader/env_test.py,sha256=ClCWWtwd2N-5ClIDUxVMeKkWfhhOTxpppsXUDmdjxSg,472
|
|
44
|
-
castor_extractor/uploader/upload.py,sha256=
|
|
44
|
+
castor_extractor/uploader/upload.py,sha256=W1TGqO8_PtFdR661qNlh6v-LOIRvoJoda65-5OujFXs,3239
|
|
45
45
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
46
46
|
castor_extractor/uploader/utils.py,sha256=Tx_i875L2vJ8btOLV3-L0UMEFiyhH8E5n0XXRyLjO0Y,793
|
|
47
47
|
castor_extractor/utils/__init__.py,sha256=bmzAOc-PKsVreMJtF7DGpPQeHrVqxWel_BblRftt6Ag,1186
|
|
48
48
|
castor_extractor/utils/client/__init__.py,sha256=CRE-xJKm6fVV9dB8ljzB5YoOxX4I1sCD1KSgqs3Y8_Y,161
|
|
49
49
|
castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2VTpqQAwV76wk,2048
|
|
50
|
-
castor_extractor/utils/client/api.py,sha256=
|
|
50
|
+
castor_extractor/utils/client/api.py,sha256=AGDj2JH__Q_x7RQdodoVazGvjGQJ9TzNqs-XEX6Hrms,1840
|
|
51
51
|
castor_extractor/utils/client/api_test.py,sha256=NSMdXg1FLc37erqHp2FZsIsogWVv6lFSs7rDXHikr-E,542
|
|
52
52
|
castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9mMqL71nyygo0,866
|
|
53
53
|
castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
|
|
@@ -277,12 +277,12 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
|
|
|
277
277
|
castor_extractor/warehouse/bigquery/query.py,sha256=hrFfjd5jW2oQnZ6ozlkn-gDe6sCIzu5zSX19T9W6fIk,4162
|
|
278
278
|
castor_extractor/warehouse/bigquery/types.py,sha256=LZVWSmE57lOemNbB5hBRyYmDk9bFAU4nbRaJWALl6N8,140
|
|
279
279
|
castor_extractor/warehouse/databricks/__init__.py,sha256=bTvDxjGQGM2J3hOnVhfNmFP1y8DK0tySiD_EXe5_xWE,200
|
|
280
|
-
castor_extractor/warehouse/databricks/client.py,sha256=
|
|
280
|
+
castor_extractor/warehouse/databricks/client.py,sha256=sNY-7FDg9nLaqw2zk_aoGAhSGi8KST9QpHAHn46700w,17439
|
|
281
281
|
castor_extractor/warehouse/databricks/client_test.py,sha256=KNp4Hi_CC6GwiW2QDJQQwqALfUebuT9D_qL6FuP_8tY,5246
|
|
282
282
|
castor_extractor/warehouse/databricks/credentials.py,sha256=PpGv5_GP320UQjV_gvaxSpOw58AmqSznmjGhGfe6bdU,655
|
|
283
283
|
castor_extractor/warehouse/databricks/extract.py,sha256=VX-3uo5dZucenrg-wnPur3CxOgpC5H7Ds92TO7OTAjc,7379
|
|
284
|
-
castor_extractor/warehouse/databricks/format.py,sha256=
|
|
285
|
-
castor_extractor/warehouse/databricks/format_test.py,sha256=
|
|
284
|
+
castor_extractor/warehouse/databricks/format.py,sha256=zSO3Cm-vpidzNA07W81I506u-ToQzkjXVwKDmS-tfiE,6088
|
|
285
|
+
castor_extractor/warehouse/databricks/format_test.py,sha256=HZvJjcB7sj7LF1kIxAeaf_KdD3XOKn9nfeQLRT39G3s,2804
|
|
286
286
|
castor_extractor/warehouse/databricks/test_constants.py,sha256=Hm96yq_ltVAKv7WYhYz637r4Cuj-1cCdyOuxMEe3J-Q,2246
|
|
287
287
|
castor_extractor/warehouse/databricks/types.py,sha256=hD6gC8oiT3QSWEvbtgUOGK_lLzzz36sEauB3lS_wxlE,218
|
|
288
288
|
castor_extractor/warehouse/mysql/__init__.py,sha256=2KFDogo9GNbApHqw3Vm5t_uNmIRjdp76nmP_WQQMfQY,116
|
|
@@ -370,8 +370,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
|
|
|
370
370
|
castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
|
|
371
371
|
castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
|
|
372
372
|
castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
|
|
373
|
-
castor_extractor-0.17.
|
|
374
|
-
castor_extractor-0.17.
|
|
375
|
-
castor_extractor-0.17.
|
|
376
|
-
castor_extractor-0.17.
|
|
377
|
-
castor_extractor-0.17.
|
|
373
|
+
castor_extractor-0.17.2.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
374
|
+
castor_extractor-0.17.2.dist-info/METADATA,sha256=V-1vK-HPqqZxUKCRncvmp__Yg4UA3B1Aza7Ac4uVFrA,6582
|
|
375
|
+
castor_extractor-0.17.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
376
|
+
castor_extractor-0.17.2.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
|
|
377
|
+
castor_extractor-0.17.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|