castor-extractor 0.24.52__py3-none-any.whl → 0.24.55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +12 -0
- castor_extractor/utils/__init__.py +1 -1
- castor_extractor/utils/constants.py +1 -0
- castor_extractor/utils/formatter.py +29 -3
- castor_extractor/utils/store.py +3 -2
- castor_extractor/utils/write.py +4 -5
- castor_extractor/warehouse/sqlserver/queries/schema.sql +1 -1
- {castor_extractor-0.24.52.dist-info → castor_extractor-0.24.55.dist-info}/METADATA +14 -2
- {castor_extractor-0.24.52.dist-info → castor_extractor-0.24.55.dist-info}/RECORD +12 -12
- {castor_extractor-0.24.52.dist-info → castor_extractor-0.24.55.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.52.dist-info → castor_extractor-0.24.55.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.52.dist-info → castor_extractor-0.24.55.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.55 - 2025-09-19
|
|
4
|
+
|
|
5
|
+
* Fix encoding in LocalStorage - force to utf-8
|
|
6
|
+
|
|
7
|
+
## 0.24.54 - 2025-09-18
|
|
8
|
+
|
|
9
|
+
* SqlServer: fix typo in the extraction query of schemas
|
|
10
|
+
|
|
11
|
+
## 0.24.53 - 2025-09-18
|
|
12
|
+
|
|
13
|
+
* Fix CSV field size to support running on Windows
|
|
14
|
+
|
|
3
15
|
## 0.24.52 - 2025-09-18
|
|
4
16
|
|
|
5
17
|
* SqlServer : improve extraction of users and technical owners
|
|
@@ -26,7 +26,7 @@ from .collection import (
|
|
|
26
26
|
group_by,
|
|
27
27
|
mapping_from_rows,
|
|
28
28
|
)
|
|
29
|
-
from .constants import OUTPUT_DIR
|
|
29
|
+
from .constants import ENCODING_UTF8, OUTPUT_DIR
|
|
30
30
|
from .deprecate import deprecate_python
|
|
31
31
|
from .env import from_env
|
|
32
32
|
from .files import explode, search_files
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""convert files to csv"""
|
|
2
2
|
|
|
3
3
|
import csv
|
|
4
|
+
import ctypes
|
|
4
5
|
import json
|
|
5
6
|
import logging
|
|
6
7
|
import re
|
|
@@ -21,6 +22,8 @@ CSV_OPTIONS: CsvOptions = {
|
|
|
21
22
|
"quotechar": '"',
|
|
22
23
|
}
|
|
23
24
|
|
|
25
|
+
CSV_FIELD_SIZE_MB = 100
|
|
26
|
+
|
|
24
27
|
ScalarValue = Union[int, float, None, str]
|
|
25
28
|
|
|
26
29
|
logger = logging.getLogger(__name__)
|
|
@@ -132,6 +135,31 @@ class Formatter(ABC):
|
|
|
132
135
|
pass
|
|
133
136
|
|
|
134
137
|
|
|
138
|
+
def _set_csv_field_size_limit(target_limit_mb: int) -> None:
|
|
139
|
+
"""
|
|
140
|
+
Safely set the maximum CSV field size limit across platforms.
|
|
141
|
+
|
|
142
|
+
This function wraps `csv.field_size_limit()` to avoid `OverflowError` on
|
|
143
|
+
Windows, where the maximum C long is only 32 bits (2^31 - 1). On Linux and
|
|
144
|
+
macOS, the C long is typically 64 bits, allowing much larger values.
|
|
145
|
+
|
|
146
|
+
The requested limit is specified in megabytes and converted to bytes.
|
|
147
|
+
|
|
148
|
+
It is then clamped to the maximum value supported by:
|
|
149
|
+
- the current platform's C long
|
|
150
|
+
- Python's `sys.maxsize`
|
|
151
|
+
- the requested target limit
|
|
152
|
+
"""
|
|
153
|
+
target_limit_bytes = target_limit_mb * 1024**2
|
|
154
|
+
|
|
155
|
+
# max value of C long for the current platform
|
|
156
|
+
platform_c_long = (1 << (8 * ctypes.sizeof(ctypes.c_long) - 1)) - 1
|
|
157
|
+
|
|
158
|
+
limit_bytes = min(target_limit_bytes, sys.maxsize, platform_c_long)
|
|
159
|
+
|
|
160
|
+
csv.field_size_limit(limit_bytes)
|
|
161
|
+
|
|
162
|
+
|
|
135
163
|
class CsvFormatter(Formatter):
|
|
136
164
|
"""
|
|
137
165
|
Serialize/Deserialize CSV
|
|
@@ -141,9 +169,7 @@ class CsvFormatter(Formatter):
|
|
|
141
169
|
return "csv"
|
|
142
170
|
|
|
143
171
|
# increase the size limit (some fields are very large)
|
|
144
|
-
|
|
145
|
-
size_limit = min(sys.maxsize, 100 * 1024**3)
|
|
146
|
-
csv.field_size_limit(size_limit)
|
|
172
|
+
_set_csv_field_size_limit(target_limit_mb=CSV_FIELD_SIZE_MB)
|
|
147
173
|
|
|
148
174
|
@staticmethod
|
|
149
175
|
def serialize(buffer: IO[str], data: Iterable[dict]) -> bool:
|
castor_extractor/utils/store.py
CHANGED
|
@@ -4,6 +4,7 @@ from collections.abc import Iterable, Iterator
|
|
|
4
4
|
from io import StringIO
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
+
from .constants import ENCODING_UTF8
|
|
7
8
|
from .formatter import CsvFormatter, Formatter
|
|
8
9
|
from .time import current_timestamp
|
|
9
10
|
from .write import timestamped_filename
|
|
@@ -58,11 +59,11 @@ class LocalStorage(AbstractStorage):
|
|
|
58
59
|
|
|
59
60
|
def put(self, name: str, data: Iterable[dict]) -> str:
|
|
60
61
|
path = self.path(name)
|
|
61
|
-
with open(path, "w") as file:
|
|
62
|
+
with open(path, "w", encoding=ENCODING_UTF8) as file:
|
|
62
63
|
self._formatter.serialize(file, data)
|
|
63
64
|
return path
|
|
64
65
|
|
|
65
66
|
def get(self, name: str) -> Iterator[dict]:
|
|
66
67
|
path = self.path(name)
|
|
67
|
-
with open(path, "r") as file:
|
|
68
|
+
with open(path, "r", encoding=ENCODING_UTF8) as file:
|
|
68
69
|
return self._formatter.deserialize(StringIO(file.read()))
|
castor_extractor/utils/write.py
CHANGED
|
@@ -3,11 +3,11 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import sys
|
|
5
5
|
from datetime import datetime
|
|
6
|
+
from importlib.metadata import version
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
8
|
-
import
|
|
9
|
+
from ..utils import ENCODING_UTF8
|
|
9
10
|
|
|
10
|
-
ENCODING = "utf8"
|
|
11
11
|
SUMMARY_FILENAME = "summary.json"
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -33,15 +33,14 @@ def write_json(filename: str, data: Any):
|
|
|
33
33
|
"""
|
|
34
34
|
write the data to a json file at path filename
|
|
35
35
|
"""
|
|
36
|
-
with open(filename, "w", encoding=
|
|
36
|
+
with open(filename, "w", encoding=ENCODING_UTF8) as f:
|
|
37
37
|
json.dump(data, f)
|
|
38
38
|
logger.info(f"Wrote output file: {filename} ({f.tell()} bytes)")
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
def _current_version() -> str:
|
|
42
42
|
"""fetch the current version of castor extractor running"""
|
|
43
|
-
|
|
44
|
-
return str(packages[0].version)
|
|
43
|
+
return version("castor-extractor")
|
|
45
44
|
|
|
46
45
|
|
|
47
46
|
def write_summary(output_directory: str, ts: int, **kwargs):
|
|
@@ -17,7 +17,7 @@ SELECT
|
|
|
17
17
|
schema_name = s.name,
|
|
18
18
|
schema_id = CAST(d.database_id AS VARCHAR(10)) + '_' + CAST(s.schema_id AS VARCHAR(10)),
|
|
19
19
|
schema_owner = u.name,
|
|
20
|
-
schema_owner_id = u.
|
|
20
|
+
schema_owner_id = u.name
|
|
21
21
|
FROM [{database}].sys.schemas AS s
|
|
22
22
|
INNER JOIN ids AS i
|
|
23
23
|
ON s.name = i.table_schema
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.55
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -36,7 +36,7 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
|
36
36
|
Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
|
|
37
37
|
Requires-Dist: google-auth (>=2,<3)
|
|
38
38
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
39
|
-
Requires-Dist: google-cloud-storage (>=
|
|
39
|
+
Requires-Dist: google-cloud-storage (>=3.1.0,<4.0.0)
|
|
40
40
|
Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
|
|
41
41
|
Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
|
|
42
42
|
Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
|
|
@@ -215,6 +215,18 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.55 - 2025-09-19
|
|
219
|
+
|
|
220
|
+
* Fix encoding in LocalStorage - force to utf-8
|
|
221
|
+
|
|
222
|
+
## 0.24.54 - 2025-09-18
|
|
223
|
+
|
|
224
|
+
* SqlServer: fix typo in the extraction query of schemas
|
|
225
|
+
|
|
226
|
+
## 0.24.53 - 2025-09-18
|
|
227
|
+
|
|
228
|
+
* Fix CSV field size to support running on Windows
|
|
229
|
+
|
|
218
230
|
## 0.24.52 - 2025-09-18
|
|
219
231
|
|
|
220
232
|
* SqlServer : improve extraction of users and technical owners
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=y8BAidkUDrMoQLEfu3LJLiqxoEUzI5hJZs4CUN_e1H0,20711
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -95,7 +95,7 @@ castor_extractor/uploader/settings.py,sha256=sUZpg9eHemM99DMrBW8bnlMuoTmCmLCKq-D
|
|
|
95
95
|
castor_extractor/uploader/upload.py,sha256=b2g9vWWjXWbt8Ms7brTc7OK_I7Z-1VSibNbppGoB2oQ,4764
|
|
96
96
|
castor_extractor/uploader/upload_test.py,sha256=UgN7TnT9Chn6KVzRcAX0Tuvp7-tps3ugxGitlgb9TSY,462
|
|
97
97
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
98
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
98
|
+
castor_extractor/utils/__init__.py,sha256=LhcSQe50m5iv2zlEPfJUa3VY4pDcoMctqLByAWNV7As,1706
|
|
99
99
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
100
100
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
101
101
|
castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
|
|
@@ -119,12 +119,12 @@ castor_extractor/utils/client/uri.py,sha256=jmP9hY-6PRqdc3-vAOdtll_U6q9VCqSqmBAN
|
|
|
119
119
|
castor_extractor/utils/client/uri_test.py,sha256=1XKF6qSseCeD4G4ckaNO07JXfGbt7XUVinOZdpEYrDQ,259
|
|
120
120
|
castor_extractor/utils/collection.py,sha256=g2HmB0ievvYHWaZ8iEzkcPPkrBFsh6R6b_liBqcsMjc,3044
|
|
121
121
|
castor_extractor/utils/collection_test.py,sha256=mlw33u4VidazQwWxJMvaFeYX3VB5CAj6rqRG-cRsLrw,2884
|
|
122
|
-
castor_extractor/utils/constants.py,sha256=
|
|
122
|
+
castor_extractor/utils/constants.py,sha256=xEUk-B__cqHPKz5_Ta9kHIsiR-a9qTXzpsTY-SzPRHo,63
|
|
123
123
|
castor_extractor/utils/deprecate.py,sha256=aBIN2QqZUx5CBNZMFfOUhi8QqtPqRcJtmrN6xqfm-y8,805
|
|
124
124
|
castor_extractor/utils/env.py,sha256=TqdtB50U8LE0993WhhEhpy89TJrHbjtIKjvg6KQ-5q0,596
|
|
125
125
|
castor_extractor/utils/files.py,sha256=qKbfu5FRjsQdKnRmaJNd5EdX_F6gf5C5tV8LdoYKxs0,1527
|
|
126
126
|
castor_extractor/utils/files_test.py,sha256=omRT3XSjaSAywYUoLh1SGWqYzl4UwBYKSYA9_7mXd_E,1542
|
|
127
|
-
castor_extractor/utils/formatter.py,sha256=
|
|
127
|
+
castor_extractor/utils/formatter.py,sha256=wys8LD5sB39g37aqIpJ3X-YXVspcsUmcuUFvZA4ODHg,5811
|
|
128
128
|
castor_extractor/utils/formatter_test.csv,sha256=UCNqPs8-xrY1AdMSpuctVFXInQe3Z_EABP4rF-Jw5ks,3802
|
|
129
129
|
castor_extractor/utils/formatter_test.json,sha256=yPP_z1ZEavaUskC-Hx33uGlwKoInHYOFKqsJ9NgwIFo,12527
|
|
130
130
|
castor_extractor/utils/formatter_test.py,sha256=VPlRTPQOaAeCySNs1wU1jd3bMppqxkVpD1dyCLt6p94,1856
|
|
@@ -148,7 +148,7 @@ castor_extractor/utils/salesforce/constants.py,sha256=7yPmUeyn4IHQiHLDutXE0L_OBd
|
|
|
148
148
|
castor_extractor/utils/salesforce/credentials.py,sha256=m_11LIaBrYVgH2bLo-QnxaIY5KhEdtfVXz9r2lb_fd0,1123
|
|
149
149
|
castor_extractor/utils/salesforce/credentials_test.py,sha256=FQRyNk2Jsh6KtYiW20oL43CVnGjXLcAjdFATkE7jK0s,586
|
|
150
150
|
castor_extractor/utils/salesforce/pagination.py,sha256=wJq0rKLdacFRggyHwB6Fh3K6iXPvL4QWhsDvZdjQjM8,849
|
|
151
|
-
castor_extractor/utils/store.py,sha256=
|
|
151
|
+
castor_extractor/utils/store.py,sha256=KAg5TzLd8jak1Gh5NK-iPu2buQIYNofx6lpXye7MRDU,2152
|
|
152
152
|
castor_extractor/utils/string.py,sha256=IQqNum7CJwuSvDGPbTAmz46YwtYDYgJKeXY7iixdjI4,2370
|
|
153
153
|
castor_extractor/utils/string_test.py,sha256=u3P2tAPhyfCLvD19rH_JcpHhPuWTHUdg0z_N_-Kxwno,2501
|
|
154
154
|
castor_extractor/utils/time.py,sha256=jmP1QWg4lv21Jp_Oy71lfJ47hjNOSgHiBOFf964RMPU,1732
|
|
@@ -158,7 +158,7 @@ castor_extractor/utils/url.py,sha256=0YaKAz3EC5PgTb5A2TNOlxf1DANK40yw6hs7ArEtJaU
|
|
|
158
158
|
castor_extractor/utils/url_test.py,sha256=LWzNdOZqjrDeLmvhPBYmP35mzhm7jGAXi021thiro1Y,1425
|
|
159
159
|
castor_extractor/utils/validation.py,sha256=dRvC9SoFVecVZuLQNN3URq37yX2sBSW3-NxIxkcol5o,1894
|
|
160
160
|
castor_extractor/utils/validation_test.py,sha256=A7P6VmI0kYX2aGIeEN12y7LsY7Kpm8pE4bdVFhbBAMw,1184
|
|
161
|
-
castor_extractor/utils/write.py,sha256=
|
|
161
|
+
castor_extractor/utils/write.py,sha256=KQVWF29N766avzmSb129IUWrId5c_8BtnYhVLmU6YIs,2133
|
|
162
162
|
castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
163
163
|
castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
|
|
164
164
|
castor_extractor/visualization/domo/assets.py,sha256=bK1urFR2tnlWkVkkhR32mAKMoKbESNlop-CNGx-65PY,206
|
|
@@ -428,14 +428,14 @@ castor_extractor/warehouse/sqlserver/queries/.sqlfluff,sha256=yy0KQdz8I_67vnXyX8
|
|
|
428
428
|
castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=ojiUQQnHXdWMbgaYOcxKBiwfi7rtu_tyamK6r4t4IBM,2929
|
|
429
429
|
castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4dPeBCn85MEOXr1f-DPXxiI3RvvoE_1n8lsbTs26E0I,150
|
|
430
430
|
castor_extractor/warehouse/sqlserver/queries/query.sql,sha256=bkENw7QovlG4MyYe5q3XNPs3ajUr_3bNzpbm0Y2upYo,821
|
|
431
|
-
castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=
|
|
431
|
+
castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=Fq_8-tCnArayON3fjd2oMWM9nuYaXX3aZMr5jOOfMuw,910
|
|
432
432
|
castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=ggzatJOlOfGkMG1NS-hD-n1-3WLbV9Yh8IsQrEFO5X4,2831
|
|
433
433
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=MAlnTis43E3Amu1e1Oz_qhaX8Bz-iN0Lrbf9RiohX7Y,99
|
|
434
434
|
castor_extractor/warehouse/sqlserver/queries/view_ddl.sql,sha256=9rynvx6MWg3iZzrWPB7haZfVKEPkxulzryE2g19x804,315
|
|
435
435
|
castor_extractor/warehouse/sqlserver/query.py,sha256=c8f7_SEMR17DhbtzuYphWqWDQ0sCRy-nR442RRBZVYw,1773
|
|
436
436
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
437
|
-
castor_extractor-0.24.
|
|
438
|
-
castor_extractor-0.24.
|
|
439
|
-
castor_extractor-0.24.
|
|
440
|
-
castor_extractor-0.24.
|
|
441
|
-
castor_extractor-0.24.
|
|
437
|
+
castor_extractor-0.24.55.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
438
|
+
castor_extractor-0.24.55.dist-info/METADATA,sha256=MhFCdByqa4_T7A4-Mb96-ISq07W6BP7M-RHgjSfI8iY,28172
|
|
439
|
+
castor_extractor-0.24.55.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
440
|
+
castor_extractor-0.24.55.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
441
|
+
castor_extractor-0.24.55.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|