castor-extractor 0.17.0__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.17.3 - 2024-06-24
4
+
5
+ * Databricks: extract tags for tables and column
6
+
7
+ ## 0.17.2 - 2024-06-14
8
+
9
+ * Uploader: support multipart
10
+
11
+ ## 0.17.1 - 2024-06-12
12
+
13
+ * Databricks: extract table source links
14
+
3
15
  ## 0.17.0 - 2024-06-10
4
16
 
5
17
  * Uploader: redirect to the proxy, replace credentials with token
@@ -83,7 +83,7 @@ def _upload(
83
83
  response = requests.post(
84
84
  url=url,
85
85
  headers=headers,
86
- data=file_content,
86
+ files={"file": file_content},
87
87
  timeout=timeout,
88
88
  )
89
89
  response.raise_for_status()
@@ -58,7 +58,12 @@ class APIClient:
58
58
 
59
59
  return result.json()
60
60
 
61
- def get(self, path: str, payload: Optional[dict] = None) -> dict:
61
+ def get(
62
+ self,
63
+ path: str,
64
+ payload: Optional[dict] = None,
65
+ processor: Optional[Callable] = None,
66
+ ) -> dict:
62
67
  """path: REST API operation path, such as /api/2.0/clusters/get"""
63
68
  url = self.build_url(self._host, path)
64
- return self._call(url=url, data=payload)
69
+ return self._call(url=url, data=payload, processor=processor)
@@ -1,5 +1,5 @@
1
1
  from .client import DatabricksClient
2
- from .credentials import DatabricksCredentials, to_credentials
2
+ from .credentials import DatabricksCredentials
3
3
  from .extract import (
4
4
  DATABRICKS_ASSETS,
5
5
  DatabricksExtractionProcessor,
@@ -1,10 +1,14 @@
1
1
  import logging
2
+ from collections import defaultdict
2
3
  from concurrent.futures import ThreadPoolExecutor
3
4
  from datetime import date
5
+ from enum import Enum
4
6
  from functools import partial
5
7
  from typing import Any, Dict, List, Optional, Set, Tuple, cast
6
8
 
7
9
  import requests
10
+ from databricks import sql # type: ignore
11
+ from requests import Response
8
12
 
9
13
  from ...utils import (
10
14
  SafeMode,
@@ -18,8 +22,9 @@ from ...utils.client.api import APIClient
18
22
  from ...utils.pager import PagerOnToken
19
23
  from ..abstract.time_filter import TimeFilter
20
24
  from .credentials import DatabricksCredentials
21
- from .format import DatabricksFormatter
25
+ from .format import DatabricksFormatter, TagMapping
22
26
  from .types import Link, Ostr, OTimestampedLink, TablesColumns, TimestampedLink
27
+ from .utils import build_path, tag_label
23
28
 
24
29
  logger = logging.getLogger(__name__)
25
30
 
@@ -30,10 +35,20 @@ _RETRY_BASE_MS = 1000
30
35
  _RETRY_EXCEPTIONS = [
31
36
  requests.exceptions.ConnectTimeout,
32
37
  ]
38
+ _WORKSPACE_ID_HEADER = "X-Databricks-Org-Id"
39
+
40
+ _INFORMATION_SCHEMA_SQL = "SELECT * FROM system.information_schema"
33
41
 
34
42
  safe_params = SafeMode((BaseException,), _MAX_NUMBER_OF_LINEAGE_ERRORS)
35
43
 
36
44
 
45
+ class TagEntity(Enum):
46
+ """Entities that can be tagged in Databricks"""
47
+
48
+ COLUMN = "COLUMN"
49
+ TABLE = "TABLE"
50
+
51
+
37
52
  def _day_to_epoch_ms(day: date) -> int:
38
53
  return int(at_midnight(day).timestamp() * 1000)
39
54
 
@@ -74,12 +89,38 @@ class DatabricksClient(APIClient):
74
89
  credentials: DatabricksCredentials,
75
90
  db_allowed: Optional[Set[str]] = None,
76
91
  db_blocked: Optional[Set[str]] = None,
92
+ has_table_tags: bool = False,
93
+ has_column_tags: bool = False,
77
94
  ):
78
95
  super().__init__(host=credentials.host, token=credentials.token)
96
+ self._http_path = credentials.http_path
79
97
  self._db_allowed = db_allowed
80
98
  self._db_blocked = db_blocked
99
+ self._has_table_tags = has_table_tags
100
+ self._has_column_tags = has_column_tags
81
101
  self.formatter = DatabricksFormatter()
82
102
 
103
+ def execute_sql(
104
+ self,
105
+ query: str,
106
+ params: Optional[dict] = None,
107
+ ):
108
+ """
109
+ Execute a SQL query on Databricks system tables and return the results.
110
+ https://docs.databricks.com/en/dev-tools/python-sql-connector.html
111
+
112
+ /!\ credentials.http_path is required in order to run SQL queries
113
+ """
114
+ assert self._http_path, "HTTP_PATH is required to run SQL queries"
115
+ with sql.connect(
116
+ server_hostname=self._host,
117
+ http_path=self._http_path,
118
+ access_token=self._token,
119
+ ) as connection:
120
+ with connection.cursor() as cursor:
121
+ cursor.execute(query, params)
122
+ return cursor.fetchall()
123
+
83
124
  @staticmethod
84
125
  def name() -> str:
85
126
  return "Databricks"
@@ -120,15 +161,38 @@ class DatabricksClient(APIClient):
120
161
  for schema in self._schemas_of_database(database)
121
162
  ]
122
163
 
123
- def _tables_columns_of_schema(self, schema: dict) -> TablesColumns:
164
+ @staticmethod
165
+ def _process_table_response(response: Response) -> Tuple[dict, str]:
166
+ """
167
+ Returns both the JSON content and the Workspace ID, which is found
168
+ in the response's headers.
169
+ """
170
+ return response.json(), response.headers[_WORKSPACE_ID_HEADER]
171
+
172
+ def _tables_columns_of_schema(
173
+ self,
174
+ schema: dict,
175
+ table_tags: TagMapping,
176
+ column_tags: TagMapping,
177
+ ) -> TablesColumns:
124
178
  path = "api/2.1/unity-catalog/tables"
125
179
  payload = {
126
180
  "catalog_name": schema["database_id"],
127
181
  "schema_name": schema["schema_name"],
128
182
  }
129
- content = self.get(path=path, payload=payload)
183
+ content, workspace_id = self.get(
184
+ path=path,
185
+ payload=payload,
186
+ processor=self._process_table_response,
187
+ )
188
+ host = self.build_url(self._host, path="")
130
189
  return self.formatter.format_table_column(
131
- content.get("tables", []), schema
190
+ raw_tables=content.get("tables", []),
191
+ schema=schema,
192
+ host=host,
193
+ workspace_id=workspace_id,
194
+ table_tags=table_tags,
195
+ column_tags=column_tags,
132
196
  )
133
197
 
134
198
  @staticmethod
@@ -141,6 +205,40 @@ class DatabricksClient(APIClient):
141
205
  return table
142
206
  return {**table, "owner_external_id": owner_external_id}
143
207
 
208
+ def _needs_extraction(self, entity: TagEntity) -> bool:
209
+ if entity == TagEntity.TABLE:
210
+ return self._has_table_tags
211
+ if entity == TagEntity.COLUMN:
212
+ return self._has_column_tags
213
+ raise AssertionError(f"Entity not supported: {entity}")
214
+
215
+ def _get_tags_mapping(self, entity: TagEntity) -> TagMapping:
216
+ """
217
+ Fetch tags of the given entity and build a mapping:
218
+ { path: list[tags] }
219
+
220
+ https://docs.databricks.com/en/sql/language-manual/information-schema/table_tags.html
221
+ https://docs.databricks.com/en/sql/language-manual/information-schema/column_tags.html
222
+ """
223
+ if not self._needs_extraction(entity):
224
+ # extracting tags require additional credentials (http_path)
225
+ return dict()
226
+
227
+ table = f"{entity.value.lower()}_tags"
228
+ query = f"{_INFORMATION_SCHEMA_SQL}.{table}"
229
+ result = self.execute_sql(query)
230
+ mapping = defaultdict(list)
231
+ for row in result:
232
+ dict_row = row.asDict()
233
+ keys = ["catalog_name", "schema_name", "table_name"]
234
+ if entity == TagEntity.COLUMN:
235
+ keys.append("column_name")
236
+ path = build_path(dict_row, keys)
237
+ label = tag_label(dict_row)
238
+ mapping[path].append(label)
239
+
240
+ return mapping
241
+
144
242
  @staticmethod
145
243
  def _get_user_mapping(users: List[dict]) -> dict:
146
244
  return {
@@ -157,8 +255,15 @@ class DatabricksClient(APIClient):
157
255
  tables: List[dict] = []
158
256
  columns: List[dict] = []
159
257
  user_mapping = self._get_user_mapping(users)
258
+ table_tags = self._get_tags_mapping(TagEntity.TABLE)
259
+ column_tags = self._get_tags_mapping(TagEntity.COLUMN)
160
260
  for schema in schemas:
161
- t_to_add, c_to_add = self._tables_columns_of_schema(schema)
261
+
262
+ t_to_add, c_to_add = self._tables_columns_of_schema(
263
+ schema=schema,
264
+ table_tags=table_tags,
265
+ column_tags=column_tags,
266
+ )
162
267
  t_with_owner = [
163
268
  self._match_table_with_user(table, user_mapping)
164
269
  for table in t_to_add
@@ -1,11 +1,10 @@
1
1
  from dataclasses import field
2
+ from typing import Optional
2
3
 
3
4
  from pydantic.dataclasses import dataclass
5
+ from pydantic_settings import SettingsConfigDict
4
6
 
5
- from ...utils import from_env
6
-
7
- _HOST = "CASTOR_DATABRICKS_HOST"
8
- _TOKEN = "CASTOR_DATABRICKS_TOKEN" # noqa: S105
7
+ DATABRICKS_ENV_PREFIX = "CASTOR_DATABRICKS_"
9
8
 
10
9
 
11
10
  @dataclass
@@ -19,10 +18,10 @@ class DatabricksCredentials:
19
18
 
20
19
  host: str
21
20
  token: str = field(metadata={"sensitive": True})
21
+ http_path: Optional[str] = field(default=None)
22
22
 
23
-
24
- def to_credentials(params: dict) -> DatabricksCredentials:
25
- """extract Databricks credentials"""
26
- host = params.get("host") or from_env(_HOST)
27
- token = params.get("token") or from_env(_TOKEN)
28
- return DatabricksCredentials(host=host, token=token)
23
+ model_config = SettingsConfigDict(
24
+ env_prefix=DATABRICKS_ENV_PREFIX,
25
+ extra="ignore",
26
+ populate_by_name=True,
27
+ )
@@ -15,7 +15,7 @@ from ..abstract import (
15
15
  common_args,
16
16
  )
17
17
  from .client import DatabricksClient
18
- from .credentials import to_credentials
18
+ from .credentials import DatabricksCredentials
19
19
 
20
20
  DATABRICKS_ASSETS: SupportedAssets = {
21
21
  WarehouseAssetGroup.ADDITIONAL_LINEAGE: ADDITIONAL_LINEAGE_ASSETS,
@@ -170,7 +170,7 @@ def extract_all(**kwargs) -> None:
170
170
  output_directory, skip_existing = common_args(kwargs)
171
171
 
172
172
  client = DatabricksClient(
173
- credentials=to_credentials(kwargs),
173
+ credentials=DatabricksCredentials(**kwargs),
174
174
  db_allowed=kwargs.get("db_allowed"),
175
175
  db_blocked=kwargs.get("db_blocked"),
176
176
  )
@@ -1,14 +1,19 @@
1
1
  import logging
2
2
  from datetime import datetime
3
- from typing import List, Optional
3
+ from typing import Dict, List, Optional
4
4
 
5
5
  from .types import TablesColumns
6
+ from .utils import build_path
6
7
 
7
8
  logger = logging.getLogger(__name__)
8
9
 
9
10
  EXCLUDED_DATABASES = {"system"}
10
11
  EXCLUDED_SCHEMAS = {"information_schema", "default"}
11
12
 
13
+ TABLE_URL_TPL = "{host}explore/data/{catalog_name}/{schema_name}/{table_name}?o={workspace_id}"
14
+
15
+ TagMapping = Dict[str, List[str]]
16
+
12
17
 
13
18
  def _to_datetime_or_none(time_ms: Optional[int]) -> Optional[datetime]:
14
19
  """return time in ms as datetime or None"""
@@ -17,26 +22,61 @@ def _to_datetime_or_none(time_ms: Optional[int]) -> Optional[datetime]:
17
22
  return datetime.fromtimestamp(time_ms / 1000.0)
18
23
 
19
24
 
20
- def _table_payload(schema: dict, table: dict) -> dict:
25
+ def _table_payload(
26
+ schema: dict,
27
+ table: dict,
28
+ host: str,
29
+ workspace_id: str,
30
+ tags: TagMapping,
31
+ ) -> dict:
32
+ """
33
+ Prepares the table payload. This also includes a source link which is built
34
+ here using the host and workspace_id.
35
+ """
36
+ url = TABLE_URL_TPL.format(
37
+ host=host,
38
+ catalog_name=table["catalog_name"],
39
+ schema_name=table["schema_name"],
40
+ table_name=table["name"],
41
+ workspace_id=workspace_id,
42
+ )
43
+
44
+ keys = ["catalog_name", "schema_name", "name"]
45
+ path = build_path(table, keys)
46
+
21
47
  return {
22
48
  "description": table.get("comment"),
23
49
  "id": table["table_id"],
24
50
  "owner_email": table.get("owner"),
25
51
  "schema_id": f"{schema['id']}",
26
52
  "table_name": table["name"],
27
- "tags": [],
53
+ "tags": tags.get(path, []),
28
54
  "type": table.get("table_type"),
55
+ "url": url,
29
56
  }
30
57
 
31
58
 
32
- def _column_payload(table: dict, column: dict) -> dict:
59
+ def _column_path(table: dict, column: dict) -> str:
60
+ keys = ["catalog_name", "schema_name", "name"]
61
+ table_path = build_path(table, keys)
62
+ column_name = column["name"]
63
+ return f"{table_path}.{column_name}"
64
+
65
+
66
+ def _column_payload(
67
+ table: dict,
68
+ column: dict,
69
+ tags: TagMapping,
70
+ ) -> dict:
71
+ path = _column_path(table, column)
33
72
  return {
34
73
  "column_name": column["name"],
35
74
  "data_type": column["type_name"],
36
75
  "description": column.get("comment"),
37
- "id": f"`{table['id']}`.`{column['name']}`",
76
+ "id": f"`{table['table_id']}`.`{column['name']}`",
38
77
  "ordinal_position": column["position"],
39
- "table_id": table["id"],
78
+ "table_id": table["table_id"],
79
+ "tags": tags.get(path, []),
40
80
  }
41
81
 
42
82
 
@@ -78,19 +118,24 @@ class DatabricksFormatter:
78
118
 
79
119
  @staticmethod
80
120
  def format_table_column(
81
- raw_tables: List[dict], schema: dict
121
+ raw_tables: List[dict],
122
+ schema: dict,
123
+ host: str,
124
+ workspace_id: str,
125
+ table_tags: TagMapping,
126
+ column_tags: TagMapping,
82
127
  ) -> TablesColumns:
83
128
  tables = []
84
129
  columns = []
85
130
  if not raw_tables:
86
131
  return [], []
87
132
  for table in raw_tables:
88
- t = _table_payload(schema, table)
133
+ t = _table_payload(schema, table, host, workspace_id, table_tags)
89
134
  tables.append(t)
90
135
  if not table.get("columns"):
91
136
  continue
92
137
  for column in table["columns"]:
93
- c = _column_payload(t, column)
138
+ c = _column_payload(table, column, column_tags)
94
139
  columns.append(c)
95
140
 
96
141
  return tables, columns
@@ -1,6 +1,12 @@
1
1
  from datetime import datetime
2
2
 
3
- from .format import DatabricksFormatter, _column_payload, _to_datetime_or_none
3
+ from .format import (
4
+ DatabricksFormatter,
5
+ _column_path,
6
+ _column_payload,
7
+ _table_payload,
8
+ _to_datetime_or_none,
9
+ )
4
10
 
5
11
 
6
12
  def test__to_datetime_or_none():
@@ -23,42 +29,95 @@ def test_DatabricksFormatter__primary():
23
29
  assert DatabricksFormatter._primary([]) is None
24
30
 
25
31
 
32
+ def test__table_payload():
33
+ schema = {"id": "id123"}
34
+
35
+ table = {
36
+ "name": "baz",
37
+ "catalog_name": "foo",
38
+ "schema_name": "bar",
39
+ "table_type": "MANAGED",
40
+ "owner": "pot@ato.com",
41
+ "table_id": "732pot5e-8ato-4c27-b701-9fa51febc192",
42
+ }
43
+ host = "https://some.cloud.databricks.net/"
44
+ workspace_id = "123456"
45
+
46
+ tags = {
47
+ "foo.bar.baz": ["riri", "fifi"],
48
+ "dummy.path": ["loulou"],
49
+ }
50
+
51
+ payload = _table_payload(schema, table, host, workspace_id, tags)
52
+
53
+ expected = {
54
+ "description": None,
55
+ "id": "732pot5e-8ato-4c27-b701-9fa51febc192",
56
+ "owner_email": "pot@ato.com",
57
+ "schema_id": "id123",
58
+ "table_name": "baz",
59
+ "tags": ["riri", "fifi"],
60
+ "type": "MANAGED",
61
+ "url": "https://some.cloud.databricks.net/explore/data/foo/bar/baz?o=123456",
62
+ }
63
+ assert payload == expected
64
+
65
+
26
66
  def test__column_payload():
27
67
  table = {
28
- "id": "18175cd5-9b9b-4d78-9d28-caaa12c21ce0",
29
- "schema_id": "dv_microservices.company_silver",
30
- "table_name": "companyrepository_organization_v1",
31
- "description": "some description",
32
- "tags": [],
33
- "type": "TABLE",
68
+ "catalog_name": "foo",
69
+ "name": "baz",
70
+ "owner": "pot@ato.com",
71
+ "schema_name": "bar",
72
+ "table_id": "732pot5e-8ato-4c27-b701-9fa51febc192",
73
+ "table_type": "MANAGED",
34
74
  }
35
75
  column = {
76
+ "comment": "some description",
36
77
  "name": "Uid",
37
- "type_text": "string",
38
- "type_name": "STRING",
78
+ "nullable": True,
39
79
  "position": 0,
80
+ "type_json": '{"name":"Uid","type":"string","nullable":true,"metadata":{}}',
81
+ "type_name": "STRING",
40
82
  "type_precision": 0,
41
83
  "type_scale": 0,
42
- "type_json": '{"name":"Uid","type":"string","nullable":true,"metadata":{}}',
43
- "nullable": True,
44
- "comment": "some description",
84
+ "type_text": "string",
45
85
  }
46
- payload = _column_payload(table, column)
86
+ tags = {
87
+ "foo.bar.baz.Uid": ["riri", "fifi"],
88
+ "dummy.path": ["loulou"],
89
+ }
90
+ payload = _column_payload(table, column, tags)
47
91
 
48
92
  expected = {
49
- "id": "`18175cd5-9b9b-4d78-9d28-caaa12c21ce0`.`Uid`",
50
93
  "column_name": "Uid",
51
- "table_id": "18175cd5-9b9b-4d78-9d28-caaa12c21ce0",
52
- "description": "some description",
53
94
  "data_type": "STRING",
95
+ "description": "some description",
96
+ "id": "`732pot5e-8ato-4c27-b701-9fa51febc192`.`Uid`",
54
97
  "ordinal_position": 0,
98
+ "table_id": "732pot5e-8ato-4c27-b701-9fa51febc192",
99
+ "tags": ["riri", "fifi"],
55
100
  }
56
101
  assert payload == expected
57
102
 
58
103
  # case where there are spaces in the name
59
104
  column["name"] = "column name with spaces"
60
- payload = _column_payload(table, column)
105
+ payload = _column_payload(table, column, tags)
61
106
  expected_id = (
62
- "`18175cd5-9b9b-4d78-9d28-caaa12c21ce0`.`column name with spaces`"
107
+ "`732pot5e-8ato-4c27-b701-9fa51febc192`.`column name with spaces`"
63
108
  )
64
109
  assert payload["id"] == expected_id
110
+
111
+
112
+ def test__column_path():
113
+ table = {
114
+ "catalog_name": "Jo",
115
+ "schema_name": "William",
116
+ "name": "Jack",
117
+ }
118
+ column = {
119
+ "name": "Averell",
120
+ }
121
+
122
+ expected = "Jo.William.Jack.Averell"
123
+ assert _column_path(table=table, column=column) == expected
@@ -0,0 +1,27 @@
1
+ from typing import Dict, List
2
+
3
+
4
+ def build_path(
5
+ row: Dict,
6
+ keys: List[str],
7
+ ) -> str:
8
+ """
9
+ format an asset's path:
10
+ - picks the given keys from dict
11
+ - join keys with a dot "."
12
+ """
13
+ key_values = [row[key] for key in keys]
14
+ return ".".join(key_values)
15
+
16
+
17
+ def tag_label(row: Dict) -> str:
18
+ """
19
+ format the tag's label:
20
+ - {key:value} when the value is not empty
21
+ - {key} otherwise
22
+ """
23
+ tag_name = row["tag_name"]
24
+ tag_value = row["tag_value"]
25
+ if not tag_value:
26
+ return tag_name
27
+ return f"{tag_name}:{tag_value}"
@@ -0,0 +1,25 @@
1
+ from .utils import build_path, tag_label
2
+
3
+
4
+ def test_build_path():
5
+ row = {
6
+ "bigflo": "oli",
7
+ "laurel": "hardy",
8
+ "dupond": "dupont",
9
+ }
10
+ keys = ["laurel", "dupond"]
11
+ assert build_path(row, keys) == "hardy.dupont"
12
+
13
+
14
+ def test_tag_label():
15
+ row = {
16
+ "tag_name": "marketplace",
17
+ "tag_value": "",
18
+ }
19
+ assert tag_label(row) == "marketplace"
20
+
21
+ row = {
22
+ "tag_name": "fi",
23
+ "tag_value": "fou",
24
+ }
25
+ assert tag_label(row) == "fi:fou"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.17.0
3
+ Version: 0.17.3
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: 3.12
18
18
  Provides-Extra: all
19
19
  Provides-Extra: bigquery
20
+ Provides-Extra: databricks
20
21
  Provides-Extra: dbt
21
22
  Provides-Extra: looker
22
23
  Provides-Extra: metabase
@@ -29,6 +30,7 @@ Provides-Extra: snowflake
29
30
  Provides-Extra: sqlserver
30
31
  Provides-Extra: tableau
31
32
  Requires-Dist: cryptography (>=41.0.5) ; extra == "snowflake"
33
+ Requires-Dist: databricks-sql-connector (>=3.2.0,<4.0.0) ; extra == "databricks" or extra == "all"
32
34
  Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
33
35
  Requires-Dist: google-auth (>=2,<3)
34
36
  Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
@@ -39,6 +41,9 @@ Requires-Dist: looker-sdk (>=23.0.0) ; extra == "looker" or extra == "all"
39
41
  Requires-Dist: msal (>=1.20.0,<2.0.0) ; extra == "powerbi" or extra == "all"
40
42
  Requires-Dist: numpy (<1.25) ; python_version >= "3.8" and python_version < "3.9"
41
43
  Requires-Dist: numpy (>=1.26,<2) ; python_version >= "3.12" and python_version < "3.13"
44
+ Requires-Dist: pandas (>=2,<2.2.0) ; python_version >= "3.9" and python_full_version <= "3.11.0"
45
+ Requires-Dist: pandas (>=2.0,<2.1) ; python_version >= "3.8" and python_version < "3.9"
46
+ Requires-Dist: pandas (>=2.1,<2.2.0) ; python_version >= "3.12" and python_version < "3.13"
42
47
  Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0) ; extra == "metabase" or extra == "postgres" or extra == "redshift" or extra == "all"
43
48
  Requires-Dist: pycryptodome (>=3.0.0,<4.0.0) ; extra == "metabase" or extra == "all"
44
49
  Requires-Dist: pydantic (>=2.6,<3.0)
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=EVZ9vhIVN7HLn5PYkRyBWyT3hk72Nt3i1SghwSipfR4,10957
1
+ CHANGELOG.md,sha256=dxvJYXKwACP7txJdP-1Ug0G6tj34Vsd8TkEn1uuhLgs,11152
2
2
  Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
3
3
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
4
4
  README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
@@ -41,13 +41,13 @@ castor_extractor/uploader/__init__.py,sha256=SSRtwjg-dNoxME-RJy9G1flASiUKAC5bH1h
41
41
  castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
42
42
  castor_extractor/uploader/env.py,sha256=5HSniVSOYVg4u38O4k8TB_qaJq9s8yJ1hjedkq_gdVg,878
43
43
  castor_extractor/uploader/env_test.py,sha256=ClCWWtwd2N-5ClIDUxVMeKkWfhhOTxpppsXUDmdjxSg,472
44
- castor_extractor/uploader/upload.py,sha256=bTWD1_-hmJ6q1qcEosjZ96wsBtWDnWoCt692NYX_Nko,3228
44
+ castor_extractor/uploader/upload.py,sha256=W1TGqO8_PtFdR661qNlh6v-LOIRvoJoda65-5OujFXs,3239
45
45
  castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
46
46
  castor_extractor/uploader/utils.py,sha256=Tx_i875L2vJ8btOLV3-L0UMEFiyhH8E5n0XXRyLjO0Y,793
47
47
  castor_extractor/utils/__init__.py,sha256=bmzAOc-PKsVreMJtF7DGpPQeHrVqxWel_BblRftt6Ag,1186
48
48
  castor_extractor/utils/client/__init__.py,sha256=CRE-xJKm6fVV9dB8ljzB5YoOxX4I1sCD1KSgqs3Y8_Y,161
49
49
  castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2VTpqQAwV76wk,2048
50
- castor_extractor/utils/client/api.py,sha256=z1o4fteWx1HxNTqCYihl9sGkIgSQTbd8lW_B9Y2wyeQ,1742
50
+ castor_extractor/utils/client/api.py,sha256=AGDj2JH__Q_x7RQdodoVazGvjGQJ9TzNqs-XEX6Hrms,1840
51
51
  castor_extractor/utils/client/api_test.py,sha256=NSMdXg1FLc37erqHp2FZsIsogWVv6lFSs7rDXHikr-E,542
52
52
  castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9mMqL71nyygo0,866
53
53
  castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
@@ -276,15 +276,17 @@ castor_extractor/warehouse/bigquery/queries/user.sql,sha256=l-fkNGWJVdZwVhbFZL23
276
276
  castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTwcgNSBDD0ZXPgRjlxJjrZDSEH2MU,326
277
277
  castor_extractor/warehouse/bigquery/query.py,sha256=hrFfjd5jW2oQnZ6ozlkn-gDe6sCIzu5zSX19T9W6fIk,4162
278
278
  castor_extractor/warehouse/bigquery/types.py,sha256=LZVWSmE57lOemNbB5hBRyYmDk9bFAU4nbRaJWALl6N8,140
279
- castor_extractor/warehouse/databricks/__init__.py,sha256=bTvDxjGQGM2J3hOnVhfNmFP1y8DK0tySiD_EXe5_xWE,200
280
- castor_extractor/warehouse/databricks/client.py,sha256=oHR_htE25p5tiAAFZKbF48efo7tqIENW4dAGA7yEqHg,16895
279
+ castor_extractor/warehouse/databricks/__init__.py,sha256=YG3YSIJgCFRjjI8eExy9T7qGnfnjWhMFh8c15KTs_BA,184
280
+ castor_extractor/warehouse/databricks/client.py,sha256=pnYb6nl9U33nH6UukYP5piWGYF-m1SP2TYiWiUozM_4,20552
281
281
  castor_extractor/warehouse/databricks/client_test.py,sha256=KNp4Hi_CC6GwiW2QDJQQwqALfUebuT9D_qL6FuP_8tY,5246
282
- castor_extractor/warehouse/databricks/credentials.py,sha256=PpGv5_GP320UQjV_gvaxSpOw58AmqSznmjGhGfe6bdU,655
283
- castor_extractor/warehouse/databricks/extract.py,sha256=VX-3uo5dZucenrg-wnPur3CxOgpC5H7Ds92TO7OTAjc,7379
284
- castor_extractor/warehouse/databricks/format.py,sha256=2bRy2fa45NW3uk030rmyba4n2Em-NnyZPBurUslEbcw,5522
285
- castor_extractor/warehouse/databricks/format_test.py,sha256=iPmdJof43fBYL1Sa_fBrCWDQHCHgm7IWCZag1kWkj9E,1970
282
+ castor_extractor/warehouse/databricks/credentials.py,sha256=iphbVynVTQXMEbJy4QaT5fer-GpOi7QtbAlg8R7-Lj4,598
283
+ castor_extractor/warehouse/databricks/extract.py,sha256=VYygE06f7ngYWVlRa48O6drLIZF-_4IBJdyXTYfxZQU,7395
284
+ castor_extractor/warehouse/databricks/format.py,sha256=p252NFzQN1uZdsu5wpP-bMHK0rBBVzallX3-o92Mvh4,6744
285
+ castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0zdca7Pms4seCHimbw8NAoAI,3322
286
286
  castor_extractor/warehouse/databricks/test_constants.py,sha256=Hm96yq_ltVAKv7WYhYz637r4Cuj-1cCdyOuxMEe3J-Q,2246
287
287
  castor_extractor/warehouse/databricks/types.py,sha256=hD6gC8oiT3QSWEvbtgUOGK_lLzzz36sEauB3lS_wxlE,218
288
+ castor_extractor/warehouse/databricks/utils.py,sha256=RWRViqLaj2K0in5T5F6OLp7HCm554BCh3zi4CJqOEt8,576
289
+ castor_extractor/warehouse/databricks/utils_test.py,sha256=5Qrd_tLNLWrDHX2uQyVUf0vqXJzD44uQGGxDBOkwvUU,503
288
290
  castor_extractor/warehouse/mysql/__init__.py,sha256=2KFDogo9GNbApHqw3Vm5t_uNmIRjdp76nmP_WQQMfQY,116
289
291
  castor_extractor/warehouse/mysql/client.py,sha256=IwoJvbmE5VZkMCP9yHf6ta3_AQPEuBPrZZ3meefbcJs,974
290
292
  castor_extractor/warehouse/mysql/client_test.py,sha256=wRTv-3c5chy_HKj-buasNiYOOCIfynYqbabM4Hxdh5E,1052
@@ -370,8 +372,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
370
372
  castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
371
373
  castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
372
374
  castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
373
- castor_extractor-0.17.0.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
374
- castor_extractor-0.17.0.dist-info/METADATA,sha256=mPiUyxCqXFifcPbhcOPFsnkPAV4OcWXoYzGeUKlbkoo,6582
375
- castor_extractor-0.17.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
376
- castor_extractor-0.17.0.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
377
- castor_extractor-0.17.0.dist-info/RECORD,,
375
+ castor_extractor-0.17.3.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
376
+ castor_extractor-0.17.3.dist-info/METADATA,sha256=v_xgS6DS7FC1kVzma9Z69XiMOjDn6BHckEJJ-rJ5TZI,6985
377
+ castor_extractor-0.17.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
378
+ castor_extractor-0.17.3.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
379
+ castor_extractor-0.17.3.dist-info/RECORD,,