castor-extractor 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +4 -0
- castor_extractor/warehouse/databricks/__init__.py +1 -1
- castor_extractor/warehouse/databricks/client.py +94 -4
- castor_extractor/warehouse/databricks/credentials.py +9 -10
- castor_extractor/warehouse/databricks/extract.py +2 -2
- castor_extractor/warehouse/databricks/format.py +34 -8
- castor_extractor/warehouse/databricks/format_test.py +44 -19
- castor_extractor/warehouse/databricks/utils.py +27 -0
- castor_extractor/warehouse/databricks/utils_test.py +25 -0
- {castor_extractor-0.17.2.dist-info → castor_extractor-0.17.3.dist-info}/METADATA +6 -1
- {castor_extractor-0.17.2.dist-info → castor_extractor-0.17.3.dist-info}/RECORD +14 -12
- {castor_extractor-0.17.2.dist-info → castor_extractor-0.17.3.dist-info}/LICENCE +0 -0
- {castor_extractor-0.17.2.dist-info → castor_extractor-0.17.3.dist-info}/WHEEL +0 -0
- {castor_extractor-0.17.2.dist-info → castor_extractor-0.17.3.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections import defaultdict
|
|
2
3
|
from concurrent.futures import ThreadPoolExecutor
|
|
3
4
|
from datetime import date
|
|
5
|
+
from enum import Enum
|
|
4
6
|
from functools import partial
|
|
5
7
|
from typing import Any, Dict, List, Optional, Set, Tuple, cast
|
|
6
8
|
|
|
7
9
|
import requests
|
|
10
|
+
from databricks import sql # type: ignore
|
|
8
11
|
from requests import Response
|
|
9
12
|
|
|
10
13
|
from ...utils import (
|
|
@@ -19,8 +22,9 @@ from ...utils.client.api import APIClient
|
|
|
19
22
|
from ...utils.pager import PagerOnToken
|
|
20
23
|
from ..abstract.time_filter import TimeFilter
|
|
21
24
|
from .credentials import DatabricksCredentials
|
|
22
|
-
from .format import DatabricksFormatter
|
|
25
|
+
from .format import DatabricksFormatter, TagMapping
|
|
23
26
|
from .types import Link, Ostr, OTimestampedLink, TablesColumns, TimestampedLink
|
|
27
|
+
from .utils import build_path, tag_label
|
|
24
28
|
|
|
25
29
|
logger = logging.getLogger(__name__)
|
|
26
30
|
|
|
@@ -33,9 +37,18 @@ _RETRY_EXCEPTIONS = [
|
|
|
33
37
|
]
|
|
34
38
|
_WORKSPACE_ID_HEADER = "X-Databricks-Org-Id"
|
|
35
39
|
|
|
40
|
+
_INFORMATION_SCHEMA_SQL = "SELECT * FROM system.information_schema"
|
|
41
|
+
|
|
36
42
|
safe_params = SafeMode((BaseException,), _MAX_NUMBER_OF_LINEAGE_ERRORS)
|
|
37
43
|
|
|
38
44
|
|
|
45
|
+
class TagEntity(Enum):
|
|
46
|
+
"""Entities that can be tagged in Databricks"""
|
|
47
|
+
|
|
48
|
+
COLUMN = "COLUMN"
|
|
49
|
+
TABLE = "TABLE"
|
|
50
|
+
|
|
51
|
+
|
|
39
52
|
def _day_to_epoch_ms(day: date) -> int:
|
|
40
53
|
return int(at_midnight(day).timestamp() * 1000)
|
|
41
54
|
|
|
@@ -76,12 +89,38 @@ class DatabricksClient(APIClient):
|
|
|
76
89
|
credentials: DatabricksCredentials,
|
|
77
90
|
db_allowed: Optional[Set[str]] = None,
|
|
78
91
|
db_blocked: Optional[Set[str]] = None,
|
|
92
|
+
has_table_tags: bool = False,
|
|
93
|
+
has_column_tags: bool = False,
|
|
79
94
|
):
|
|
80
95
|
super().__init__(host=credentials.host, token=credentials.token)
|
|
96
|
+
self._http_path = credentials.http_path
|
|
81
97
|
self._db_allowed = db_allowed
|
|
82
98
|
self._db_blocked = db_blocked
|
|
99
|
+
self._has_table_tags = has_table_tags
|
|
100
|
+
self._has_column_tags = has_column_tags
|
|
83
101
|
self.formatter = DatabricksFormatter()
|
|
84
102
|
|
|
103
|
+
def execute_sql(
|
|
104
|
+
self,
|
|
105
|
+
query: str,
|
|
106
|
+
params: Optional[dict] = None,
|
|
107
|
+
):
|
|
108
|
+
"""
|
|
109
|
+
Execute a SQL query on Databricks system tables and return the results.
|
|
110
|
+
https://docs.databricks.com/en/dev-tools/python-sql-connector.html
|
|
111
|
+
|
|
112
|
+
/!\ credentials.http_path is required in order to run SQL queries
|
|
113
|
+
"""
|
|
114
|
+
assert self._http_path, "HTTP_PATH is required to run SQL queries"
|
|
115
|
+
with sql.connect(
|
|
116
|
+
server_hostname=self._host,
|
|
117
|
+
http_path=self._http_path,
|
|
118
|
+
access_token=self._token,
|
|
119
|
+
) as connection:
|
|
120
|
+
with connection.cursor() as cursor:
|
|
121
|
+
cursor.execute(query, params)
|
|
122
|
+
return cursor.fetchall()
|
|
123
|
+
|
|
85
124
|
@staticmethod
|
|
86
125
|
def name() -> str:
|
|
87
126
|
return "Databricks"
|
|
@@ -130,7 +169,12 @@ class DatabricksClient(APIClient):
|
|
|
130
169
|
"""
|
|
131
170
|
return response.json(), response.headers[_WORKSPACE_ID_HEADER]
|
|
132
171
|
|
|
133
|
-
def _tables_columns_of_schema(
|
|
172
|
+
def _tables_columns_of_schema(
|
|
173
|
+
self,
|
|
174
|
+
schema: dict,
|
|
175
|
+
table_tags: TagMapping,
|
|
176
|
+
column_tags: TagMapping,
|
|
177
|
+
) -> TablesColumns:
|
|
134
178
|
path = "api/2.1/unity-catalog/tables"
|
|
135
179
|
payload = {
|
|
136
180
|
"catalog_name": schema["database_id"],
|
|
@@ -143,7 +187,12 @@ class DatabricksClient(APIClient):
|
|
|
143
187
|
)
|
|
144
188
|
host = self.build_url(self._host, path="")
|
|
145
189
|
return self.formatter.format_table_column(
|
|
146
|
-
content.get("tables", []),
|
|
190
|
+
raw_tables=content.get("tables", []),
|
|
191
|
+
schema=schema,
|
|
192
|
+
host=host,
|
|
193
|
+
workspace_id=workspace_id,
|
|
194
|
+
table_tags=table_tags,
|
|
195
|
+
column_tags=column_tags,
|
|
147
196
|
)
|
|
148
197
|
|
|
149
198
|
@staticmethod
|
|
@@ -156,6 +205,40 @@ class DatabricksClient(APIClient):
|
|
|
156
205
|
return table
|
|
157
206
|
return {**table, "owner_external_id": owner_external_id}
|
|
158
207
|
|
|
208
|
+
def _needs_extraction(self, entity: TagEntity) -> bool:
|
|
209
|
+
if entity == TagEntity.TABLE:
|
|
210
|
+
return self._has_table_tags
|
|
211
|
+
if entity == TagEntity.COLUMN:
|
|
212
|
+
return self._has_column_tags
|
|
213
|
+
raise AssertionError(f"Entity not supported: {entity}")
|
|
214
|
+
|
|
215
|
+
def _get_tags_mapping(self, entity: TagEntity) -> TagMapping:
|
|
216
|
+
"""
|
|
217
|
+
Fetch tags of the given entity and build a mapping:
|
|
218
|
+
{ path: list[tags] }
|
|
219
|
+
|
|
220
|
+
https://docs.databricks.com/en/sql/language-manual/information-schema/table_tags.html
|
|
221
|
+
https://docs.databricks.com/en/sql/language-manual/information-schema/column_tags.html
|
|
222
|
+
"""
|
|
223
|
+
if not self._needs_extraction(entity):
|
|
224
|
+
# extracting tags require additional credentials (http_path)
|
|
225
|
+
return dict()
|
|
226
|
+
|
|
227
|
+
table = f"{entity.value.lower()}_tags"
|
|
228
|
+
query = f"{_INFORMATION_SCHEMA_SQL}.{table}"
|
|
229
|
+
result = self.execute_sql(query)
|
|
230
|
+
mapping = defaultdict(list)
|
|
231
|
+
for row in result:
|
|
232
|
+
dict_row = row.asDict()
|
|
233
|
+
keys = ["catalog_name", "schema_name", "table_name"]
|
|
234
|
+
if entity == TagEntity.COLUMN:
|
|
235
|
+
keys.append("column_name")
|
|
236
|
+
path = build_path(dict_row, keys)
|
|
237
|
+
label = tag_label(dict_row)
|
|
238
|
+
mapping[path].append(label)
|
|
239
|
+
|
|
240
|
+
return mapping
|
|
241
|
+
|
|
159
242
|
@staticmethod
|
|
160
243
|
def _get_user_mapping(users: List[dict]) -> dict:
|
|
161
244
|
return {
|
|
@@ -172,8 +255,15 @@ class DatabricksClient(APIClient):
|
|
|
172
255
|
tables: List[dict] = []
|
|
173
256
|
columns: List[dict] = []
|
|
174
257
|
user_mapping = self._get_user_mapping(users)
|
|
258
|
+
table_tags = self._get_tags_mapping(TagEntity.TABLE)
|
|
259
|
+
column_tags = self._get_tags_mapping(TagEntity.COLUMN)
|
|
175
260
|
for schema in schemas:
|
|
176
|
-
|
|
261
|
+
|
|
262
|
+
t_to_add, c_to_add = self._tables_columns_of_schema(
|
|
263
|
+
schema=schema,
|
|
264
|
+
table_tags=table_tags,
|
|
265
|
+
column_tags=column_tags,
|
|
266
|
+
)
|
|
177
267
|
t_with_owner = [
|
|
178
268
|
self._match_table_with_user(table, user_mapping)
|
|
179
269
|
for table in t_to_add
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from dataclasses import field
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from pydantic.dataclasses import dataclass
|
|
5
|
+
from pydantic_settings import SettingsConfigDict
|
|
4
6
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
_HOST = "CASTOR_DATABRICKS_HOST"
|
|
8
|
-
_TOKEN = "CASTOR_DATABRICKS_TOKEN" # noqa: S105
|
|
7
|
+
DATABRICKS_ENV_PREFIX = "CASTOR_DATABRICKS_"
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
@dataclass
|
|
@@ -19,10 +18,10 @@ class DatabricksCredentials:
|
|
|
19
18
|
|
|
20
19
|
host: str
|
|
21
20
|
token: str = field(metadata={"sensitive": True})
|
|
21
|
+
http_path: Optional[str] = field(default=None)
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
return DatabricksCredentials(host=host, token=token)
|
|
23
|
+
model_config = SettingsConfigDict(
|
|
24
|
+
env_prefix=DATABRICKS_ENV_PREFIX,
|
|
25
|
+
extra="ignore",
|
|
26
|
+
populate_by_name=True,
|
|
27
|
+
)
|
|
@@ -15,7 +15,7 @@ from ..abstract import (
|
|
|
15
15
|
common_args,
|
|
16
16
|
)
|
|
17
17
|
from .client import DatabricksClient
|
|
18
|
-
from .credentials import
|
|
18
|
+
from .credentials import DatabricksCredentials
|
|
19
19
|
|
|
20
20
|
DATABRICKS_ASSETS: SupportedAssets = {
|
|
21
21
|
WarehouseAssetGroup.ADDITIONAL_LINEAGE: ADDITIONAL_LINEAGE_ASSETS,
|
|
@@ -170,7 +170,7 @@ def extract_all(**kwargs) -> None:
|
|
|
170
170
|
output_directory, skip_existing = common_args(kwargs)
|
|
171
171
|
|
|
172
172
|
client = DatabricksClient(
|
|
173
|
-
credentials=
|
|
173
|
+
credentials=DatabricksCredentials(**kwargs),
|
|
174
174
|
db_allowed=kwargs.get("db_allowed"),
|
|
175
175
|
db_blocked=kwargs.get("db_blocked"),
|
|
176
176
|
)
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
from .types import TablesColumns
|
|
6
|
+
from .utils import build_path
|
|
6
7
|
|
|
7
8
|
logger = logging.getLogger(__name__)
|
|
8
9
|
|
|
@@ -11,6 +12,8 @@ EXCLUDED_SCHEMAS = {"information_schema", "default"}
|
|
|
11
12
|
|
|
12
13
|
TABLE_URL_TPL = "{host}explore/data/{catalog_name}/{schema_name}/{table_name}?o={workspace_id}"
|
|
13
14
|
|
|
15
|
+
TagMapping = Dict[str, List[str]]
|
|
16
|
+
|
|
14
17
|
|
|
15
18
|
def _to_datetime_or_none(time_ms: Optional[int]) -> Optional[datetime]:
|
|
16
19
|
"""return time in ms as datetime or None"""
|
|
@@ -24,6 +27,7 @@ def _table_payload(
|
|
|
24
27
|
table: dict,
|
|
25
28
|
host: str,
|
|
26
29
|
workspace_id: str,
|
|
30
|
+
tags: TagMapping,
|
|
27
31
|
) -> dict:
|
|
28
32
|
"""
|
|
29
33
|
Prepares the table payload. This also includes a source link which is built
|
|
@@ -36,26 +40,43 @@ def _table_payload(
|
|
|
36
40
|
table_name=table["name"],
|
|
37
41
|
workspace_id=workspace_id,
|
|
38
42
|
)
|
|
43
|
+
|
|
44
|
+
keys = ["catalog_name", "schema_name", "name"]
|
|
45
|
+
path = build_path(table, keys)
|
|
46
|
+
|
|
39
47
|
return {
|
|
40
48
|
"description": table.get("comment"),
|
|
41
49
|
"id": table["table_id"],
|
|
42
50
|
"owner_email": table.get("owner"),
|
|
43
51
|
"schema_id": f"{schema['id']}",
|
|
44
52
|
"table_name": table["name"],
|
|
45
|
-
"tags": [],
|
|
53
|
+
"tags": tags.get(path, []),
|
|
46
54
|
"type": table.get("table_type"),
|
|
47
55
|
"url": url,
|
|
48
56
|
}
|
|
49
57
|
|
|
50
58
|
|
|
51
|
-
def
|
|
59
|
+
def _column_path(table: dict, column: dict) -> str:
|
|
60
|
+
keys = ["catalog_name", "schema_name", "name"]
|
|
61
|
+
table_path = build_path(table, keys)
|
|
62
|
+
column_name = column["name"]
|
|
63
|
+
return f"{table_path}.{column_name}"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _column_payload(
|
|
67
|
+
table: dict,
|
|
68
|
+
column: dict,
|
|
69
|
+
tags: TagMapping,
|
|
70
|
+
) -> dict:
|
|
71
|
+
path = _column_path(table, column)
|
|
52
72
|
return {
|
|
53
73
|
"column_name": column["name"],
|
|
54
74
|
"data_type": column["type_name"],
|
|
55
75
|
"description": column.get("comment"),
|
|
56
|
-
"id": f"`{table['
|
|
76
|
+
"id": f"`{table['table_id']}`.`{column['name']}`",
|
|
57
77
|
"ordinal_position": column["position"],
|
|
58
|
-
"table_id": table["
|
|
78
|
+
"table_id": table["table_id"],
|
|
79
|
+
"tags": tags.get(path, []),
|
|
59
80
|
}
|
|
60
81
|
|
|
61
82
|
|
|
@@ -97,19 +118,24 @@ class DatabricksFormatter:
|
|
|
97
118
|
|
|
98
119
|
@staticmethod
|
|
99
120
|
def format_table_column(
|
|
100
|
-
raw_tables: List[dict],
|
|
121
|
+
raw_tables: List[dict],
|
|
122
|
+
schema: dict,
|
|
123
|
+
host: str,
|
|
124
|
+
workspace_id: str,
|
|
125
|
+
table_tags: TagMapping,
|
|
126
|
+
column_tags: TagMapping,
|
|
101
127
|
) -> TablesColumns:
|
|
102
128
|
tables = []
|
|
103
129
|
columns = []
|
|
104
130
|
if not raw_tables:
|
|
105
131
|
return [], []
|
|
106
132
|
for table in raw_tables:
|
|
107
|
-
t = _table_payload(schema, table, host, workspace_id)
|
|
133
|
+
t = _table_payload(schema, table, host, workspace_id, table_tags)
|
|
108
134
|
tables.append(t)
|
|
109
135
|
if not table.get("columns"):
|
|
110
136
|
continue
|
|
111
137
|
for column in table["columns"]:
|
|
112
|
-
c = _column_payload(
|
|
138
|
+
c = _column_payload(table, column, column_tags)
|
|
113
139
|
columns.append(c)
|
|
114
140
|
|
|
115
141
|
return tables, columns
|
|
@@ -2,6 +2,7 @@ from datetime import datetime
|
|
|
2
2
|
|
|
3
3
|
from .format import (
|
|
4
4
|
DatabricksFormatter,
|
|
5
|
+
_column_path,
|
|
5
6
|
_column_payload,
|
|
6
7
|
_table_payload,
|
|
7
8
|
_to_datetime_or_none,
|
|
@@ -42,7 +43,12 @@ def test__table_payload():
|
|
|
42
43
|
host = "https://some.cloud.databricks.net/"
|
|
43
44
|
workspace_id = "123456"
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
tags = {
|
|
47
|
+
"foo.bar.baz": ["riri", "fifi"],
|
|
48
|
+
"dummy.path": ["loulou"],
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
payload = _table_payload(schema, table, host, workspace_id, tags)
|
|
46
52
|
|
|
47
53
|
expected = {
|
|
48
54
|
"description": None,
|
|
@@ -50,7 +56,7 @@ def test__table_payload():
|
|
|
50
56
|
"owner_email": "pot@ato.com",
|
|
51
57
|
"schema_id": "id123",
|
|
52
58
|
"table_name": "baz",
|
|
53
|
-
"tags": [],
|
|
59
|
+
"tags": ["riri", "fifi"],
|
|
54
60
|
"type": "MANAGED",
|
|
55
61
|
"url": "https://some.cloud.databricks.net/explore/data/foo/bar/baz?o=123456",
|
|
56
62
|
}
|
|
@@ -59,40 +65,59 @@ def test__table_payload():
|
|
|
59
65
|
|
|
60
66
|
def test__column_payload():
|
|
61
67
|
table = {
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
"
|
|
67
|
-
"
|
|
68
|
+
"catalog_name": "foo",
|
|
69
|
+
"name": "baz",
|
|
70
|
+
"owner": "pot@ato.com",
|
|
71
|
+
"schema_name": "bar",
|
|
72
|
+
"table_id": "732pot5e-8ato-4c27-b701-9fa51febc192",
|
|
73
|
+
"table_type": "MANAGED",
|
|
68
74
|
}
|
|
69
75
|
column = {
|
|
76
|
+
"comment": "some description",
|
|
70
77
|
"name": "Uid",
|
|
71
|
-
"
|
|
72
|
-
"type_name": "STRING",
|
|
78
|
+
"nullable": True,
|
|
73
79
|
"position": 0,
|
|
80
|
+
"type_json": '{"name":"Uid","type":"string","nullable":true,"metadata":{}}',
|
|
81
|
+
"type_name": "STRING",
|
|
74
82
|
"type_precision": 0,
|
|
75
83
|
"type_scale": 0,
|
|
76
|
-
"
|
|
77
|
-
"nullable": True,
|
|
78
|
-
"comment": "some description",
|
|
84
|
+
"type_text": "string",
|
|
79
85
|
}
|
|
80
|
-
|
|
86
|
+
tags = {
|
|
87
|
+
"foo.bar.baz.Uid": ["riri", "fifi"],
|
|
88
|
+
"dummy.path": ["loulou"],
|
|
89
|
+
}
|
|
90
|
+
payload = _column_payload(table, column, tags)
|
|
81
91
|
|
|
82
92
|
expected = {
|
|
83
|
-
"id": "`18175cd5-9b9b-4d78-9d28-caaa12c21ce0`.`Uid`",
|
|
84
93
|
"column_name": "Uid",
|
|
85
|
-
"table_id": "18175cd5-9b9b-4d78-9d28-caaa12c21ce0",
|
|
86
|
-
"description": "some description",
|
|
87
94
|
"data_type": "STRING",
|
|
95
|
+
"description": "some description",
|
|
96
|
+
"id": "`732pot5e-8ato-4c27-b701-9fa51febc192`.`Uid`",
|
|
88
97
|
"ordinal_position": 0,
|
|
98
|
+
"table_id": "732pot5e-8ato-4c27-b701-9fa51febc192",
|
|
99
|
+
"tags": ["riri", "fifi"],
|
|
89
100
|
}
|
|
90
101
|
assert payload == expected
|
|
91
102
|
|
|
92
103
|
# case where there are spaces in the name
|
|
93
104
|
column["name"] = "column name with spaces"
|
|
94
|
-
payload = _column_payload(table, column)
|
|
105
|
+
payload = _column_payload(table, column, tags)
|
|
95
106
|
expected_id = (
|
|
96
|
-
"`
|
|
107
|
+
"`732pot5e-8ato-4c27-b701-9fa51febc192`.`column name with spaces`"
|
|
97
108
|
)
|
|
98
109
|
assert payload["id"] == expected_id
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test__column_path():
|
|
113
|
+
table = {
|
|
114
|
+
"catalog_name": "Jo",
|
|
115
|
+
"schema_name": "William",
|
|
116
|
+
"name": "Jack",
|
|
117
|
+
}
|
|
118
|
+
column = {
|
|
119
|
+
"name": "Averell",
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
expected = "Jo.William.Jack.Averell"
|
|
123
|
+
assert _column_path(table=table, column=column) == expected
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def build_path(
|
|
5
|
+
row: Dict,
|
|
6
|
+
keys: List[str],
|
|
7
|
+
) -> str:
|
|
8
|
+
"""
|
|
9
|
+
format an asset's path:
|
|
10
|
+
- picks the given keys from dict
|
|
11
|
+
- join keys with a dot "."
|
|
12
|
+
"""
|
|
13
|
+
key_values = [row[key] for key in keys]
|
|
14
|
+
return ".".join(key_values)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def tag_label(row: Dict) -> str:
|
|
18
|
+
"""
|
|
19
|
+
format the tag's label:
|
|
20
|
+
- {key:value} when the value is not empty
|
|
21
|
+
- {key} otherwise
|
|
22
|
+
"""
|
|
23
|
+
tag_name = row["tag_name"]
|
|
24
|
+
tag_value = row["tag_value"]
|
|
25
|
+
if not tag_value:
|
|
26
|
+
return tag_name
|
|
27
|
+
return f"{tag_name}:{tag_value}"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from .utils import build_path, tag_label
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_build_path():
|
|
5
|
+
row = {
|
|
6
|
+
"bigflo": "oli",
|
|
7
|
+
"laurel": "hardy",
|
|
8
|
+
"dupond": "dupont",
|
|
9
|
+
}
|
|
10
|
+
keys = ["laurel", "dupond"]
|
|
11
|
+
assert build_path(row, keys) == "hardy.dupont"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_tag_label():
|
|
15
|
+
row = {
|
|
16
|
+
"tag_name": "marketplace",
|
|
17
|
+
"tag_value": "",
|
|
18
|
+
}
|
|
19
|
+
assert tag_label(row) == "marketplace"
|
|
20
|
+
|
|
21
|
+
row = {
|
|
22
|
+
"tag_name": "fi",
|
|
23
|
+
"tag_value": "fou",
|
|
24
|
+
}
|
|
25
|
+
assert tag_label(row) == "fi:fou"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.17.
|
|
3
|
+
Version: 0.17.3
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
18
|
Provides-Extra: all
|
|
19
19
|
Provides-Extra: bigquery
|
|
20
|
+
Provides-Extra: databricks
|
|
20
21
|
Provides-Extra: dbt
|
|
21
22
|
Provides-Extra: looker
|
|
22
23
|
Provides-Extra: metabase
|
|
@@ -29,6 +30,7 @@ Provides-Extra: snowflake
|
|
|
29
30
|
Provides-Extra: sqlserver
|
|
30
31
|
Provides-Extra: tableau
|
|
31
32
|
Requires-Dist: cryptography (>=41.0.5) ; extra == "snowflake"
|
|
33
|
+
Requires-Dist: databricks-sql-connector (>=3.2.0,<4.0.0) ; extra == "databricks" or extra == "all"
|
|
32
34
|
Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
33
35
|
Requires-Dist: google-auth (>=2,<3)
|
|
34
36
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
@@ -39,6 +41,9 @@ Requires-Dist: looker-sdk (>=23.0.0) ; extra == "looker" or extra == "all"
|
|
|
39
41
|
Requires-Dist: msal (>=1.20.0,<2.0.0) ; extra == "powerbi" or extra == "all"
|
|
40
42
|
Requires-Dist: numpy (<1.25) ; python_version >= "3.8" and python_version < "3.9"
|
|
41
43
|
Requires-Dist: numpy (>=1.26,<2) ; python_version >= "3.12" and python_version < "3.13"
|
|
44
|
+
Requires-Dist: pandas (>=2,<2.2.0) ; python_version >= "3.9" and python_full_version <= "3.11.0"
|
|
45
|
+
Requires-Dist: pandas (>=2.0,<2.1) ; python_version >= "3.8" and python_version < "3.9"
|
|
46
|
+
Requires-Dist: pandas (>=2.1,<2.2.0) ; python_version >= "3.12" and python_version < "3.13"
|
|
42
47
|
Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0) ; extra == "metabase" or extra == "postgres" or extra == "redshift" or extra == "all"
|
|
43
48
|
Requires-Dist: pycryptodome (>=3.0.0,<4.0.0) ; extra == "metabase" or extra == "all"
|
|
44
49
|
Requires-Dist: pydantic (>=2.6,<3.0)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=dxvJYXKwACP7txJdP-1Ug0G6tj34Vsd8TkEn1uuhLgs,11152
|
|
2
2
|
Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
|
|
3
3
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
4
4
|
README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
|
|
@@ -276,15 +276,17 @@ castor_extractor/warehouse/bigquery/queries/user.sql,sha256=l-fkNGWJVdZwVhbFZL23
|
|
|
276
276
|
castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTwcgNSBDD0ZXPgRjlxJjrZDSEH2MU,326
|
|
277
277
|
castor_extractor/warehouse/bigquery/query.py,sha256=hrFfjd5jW2oQnZ6ozlkn-gDe6sCIzu5zSX19T9W6fIk,4162
|
|
278
278
|
castor_extractor/warehouse/bigquery/types.py,sha256=LZVWSmE57lOemNbB5hBRyYmDk9bFAU4nbRaJWALl6N8,140
|
|
279
|
-
castor_extractor/warehouse/databricks/__init__.py,sha256=
|
|
280
|
-
castor_extractor/warehouse/databricks/client.py,sha256=
|
|
279
|
+
castor_extractor/warehouse/databricks/__init__.py,sha256=YG3YSIJgCFRjjI8eExy9T7qGnfnjWhMFh8c15KTs_BA,184
|
|
280
|
+
castor_extractor/warehouse/databricks/client.py,sha256=pnYb6nl9U33nH6UukYP5piWGYF-m1SP2TYiWiUozM_4,20552
|
|
281
281
|
castor_extractor/warehouse/databricks/client_test.py,sha256=KNp4Hi_CC6GwiW2QDJQQwqALfUebuT9D_qL6FuP_8tY,5246
|
|
282
|
-
castor_extractor/warehouse/databricks/credentials.py,sha256=
|
|
283
|
-
castor_extractor/warehouse/databricks/extract.py,sha256=
|
|
284
|
-
castor_extractor/warehouse/databricks/format.py,sha256=
|
|
285
|
-
castor_extractor/warehouse/databricks/format_test.py,sha256=
|
|
282
|
+
castor_extractor/warehouse/databricks/credentials.py,sha256=iphbVynVTQXMEbJy4QaT5fer-GpOi7QtbAlg8R7-Lj4,598
|
|
283
|
+
castor_extractor/warehouse/databricks/extract.py,sha256=VYygE06f7ngYWVlRa48O6drLIZF-_4IBJdyXTYfxZQU,7395
|
|
284
|
+
castor_extractor/warehouse/databricks/format.py,sha256=p252NFzQN1uZdsu5wpP-bMHK0rBBVzallX3-o92Mvh4,6744
|
|
285
|
+
castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0zdca7Pms4seCHimbw8NAoAI,3322
|
|
286
286
|
castor_extractor/warehouse/databricks/test_constants.py,sha256=Hm96yq_ltVAKv7WYhYz637r4Cuj-1cCdyOuxMEe3J-Q,2246
|
|
287
287
|
castor_extractor/warehouse/databricks/types.py,sha256=hD6gC8oiT3QSWEvbtgUOGK_lLzzz36sEauB3lS_wxlE,218
|
|
288
|
+
castor_extractor/warehouse/databricks/utils.py,sha256=RWRViqLaj2K0in5T5F6OLp7HCm554BCh3zi4CJqOEt8,576
|
|
289
|
+
castor_extractor/warehouse/databricks/utils_test.py,sha256=5Qrd_tLNLWrDHX2uQyVUf0vqXJzD44uQGGxDBOkwvUU,503
|
|
288
290
|
castor_extractor/warehouse/mysql/__init__.py,sha256=2KFDogo9GNbApHqw3Vm5t_uNmIRjdp76nmP_WQQMfQY,116
|
|
289
291
|
castor_extractor/warehouse/mysql/client.py,sha256=IwoJvbmE5VZkMCP9yHf6ta3_AQPEuBPrZZ3meefbcJs,974
|
|
290
292
|
castor_extractor/warehouse/mysql/client_test.py,sha256=wRTv-3c5chy_HKj-buasNiYOOCIfynYqbabM4Hxdh5E,1052
|
|
@@ -370,8 +372,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
|
|
|
370
372
|
castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
|
|
371
373
|
castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
|
|
372
374
|
castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
|
|
373
|
-
castor_extractor-0.17.
|
|
374
|
-
castor_extractor-0.17.
|
|
375
|
-
castor_extractor-0.17.
|
|
376
|
-
castor_extractor-0.17.
|
|
377
|
-
castor_extractor-0.17.
|
|
375
|
+
castor_extractor-0.17.3.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
376
|
+
castor_extractor-0.17.3.dist-info/METADATA,sha256=v_xgS6DS7FC1kVzma9Z69XiMOjDn6BHckEJJ-rJ5TZI,6985
|
|
377
|
+
castor_extractor-0.17.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
378
|
+
castor_extractor-0.17.3.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
|
|
379
|
+
castor_extractor-0.17.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|