castor-extractor 0.24.52__py3-none-any.whl → 0.24.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.55 - 2025-09-19
4
+
5
+ * Fix encoding in LocalStorage - force to utf-8
6
+
7
+ ## 0.24.54 - 2025-09-18
8
+
9
+ * SqlServer: fix typo in the extraction query of schemas
10
+
11
+ ## 0.24.53 - 2025-09-18
12
+
13
+ * Fix CSV field size to support running on Windows
14
+
3
15
  ## 0.24.52 - 2025-09-18
4
16
 
5
17
  * SqlServer : improve extraction of users and technical owners
@@ -26,7 +26,7 @@ from .collection import (
26
26
  group_by,
27
27
  mapping_from_rows,
28
28
  )
29
- from .constants import OUTPUT_DIR
29
+ from .constants import ENCODING_UTF8, OUTPUT_DIR
30
30
  from .deprecate import deprecate_python
31
31
  from .env import from_env
32
32
  from .files import explode, search_files
@@ -1 +1,2 @@
1
1
  OUTPUT_DIR = "CASTOR_OUTPUT_DIRECTORY"
2
+ ENCODING_UTF8 = "utf-8"
@@ -1,6 +1,7 @@
1
1
  """convert files to csv"""
2
2
 
3
3
  import csv
4
+ import ctypes
4
5
  import json
5
6
  import logging
6
7
  import re
@@ -21,6 +22,8 @@ CSV_OPTIONS: CsvOptions = {
21
22
  "quotechar": '"',
22
23
  }
23
24
 
25
+ CSV_FIELD_SIZE_MB = 100
26
+
24
27
  ScalarValue = Union[int, float, None, str]
25
28
 
26
29
  logger = logging.getLogger(__name__)
@@ -132,6 +135,31 @@ class Formatter(ABC):
132
135
  pass
133
136
 
134
137
 
138
+ def _set_csv_field_size_limit(target_limit_mb: int) -> None:
139
+ """
140
+ Safely set the maximum CSV field size limit across platforms.
141
+
142
+ This function wraps `csv.field_size_limit()` to avoid `OverflowError` on
143
+ Windows, where the maximum C long is only 32 bits (2^31 - 1). On Linux and
144
+ macOS, the C long is typically 64 bits, allowing much larger values.
145
+
146
+ The requested limit is specified in megabytes and converted to bytes.
147
+
148
+ It is then clamped to the maximum value supported by:
149
+ - the current platform's C long
150
+ - Python's `sys.maxsize`
151
+ - the requested target limit
152
+ """
153
+ target_limit_bytes = target_limit_mb * 1024**2
154
+
155
+ # max value of C long for the current platform
156
+ platform_c_long = (1 << (8 * ctypes.sizeof(ctypes.c_long) - 1)) - 1
157
+
158
+ limit_bytes = min(target_limit_bytes, sys.maxsize, platform_c_long)
159
+
160
+ csv.field_size_limit(limit_bytes)
161
+
162
+
135
163
  class CsvFormatter(Formatter):
136
164
  """
137
165
  Serialize/Deserialize CSV
@@ -141,9 +169,7 @@ class CsvFormatter(Formatter):
141
169
  return "csv"
142
170
 
143
171
  # increase the size limit (some fields are very large)
144
- # Limit to 100 MB. Value must be smaller than the C long maximum value.
145
- size_limit = min(sys.maxsize, 100 * 1024**3)
146
- csv.field_size_limit(size_limit)
172
+ _set_csv_field_size_limit(target_limit_mb=CSV_FIELD_SIZE_MB)
147
173
 
148
174
  @staticmethod
149
175
  def serialize(buffer: IO[str], data: Iterable[dict]) -> bool:
@@ -4,6 +4,7 @@ from collections.abc import Iterable, Iterator
4
4
  from io import StringIO
5
5
  from typing import Optional
6
6
 
7
+ from .constants import ENCODING_UTF8
7
8
  from .formatter import CsvFormatter, Formatter
8
9
  from .time import current_timestamp
9
10
  from .write import timestamped_filename
@@ -58,11 +59,11 @@ class LocalStorage(AbstractStorage):
58
59
 
59
60
  def put(self, name: str, data: Iterable[dict]) -> str:
60
61
  path = self.path(name)
61
- with open(path, "w") as file:
62
+ with open(path, "w", encoding=ENCODING_UTF8) as file:
62
63
  self._formatter.serialize(file, data)
63
64
  return path
64
65
 
65
66
  def get(self, name: str) -> Iterator[dict]:
66
67
  path = self.path(name)
67
- with open(path, "r") as file:
68
+ with open(path, "r", encoding=ENCODING_UTF8) as file:
68
69
  return self._formatter.deserialize(StringIO(file.read()))
@@ -3,11 +3,11 @@ import logging
3
3
  import os
4
4
  import sys
5
5
  from datetime import datetime
6
+ from importlib.metadata import version
6
7
  from typing import Any
7
8
 
8
- import pkg_resources
9
+ from ..utils import ENCODING_UTF8
9
10
 
10
- ENCODING = "utf8"
11
11
  SUMMARY_FILENAME = "summary.json"
12
12
 
13
13
  logger = logging.getLogger(__name__)
@@ -33,15 +33,14 @@ def write_json(filename: str, data: Any):
33
33
  """
34
34
  write the data to a json file at path filename
35
35
  """
36
- with open(filename, "w", encoding=ENCODING) as f:
36
+ with open(filename, "w", encoding=ENCODING_UTF8) as f:
37
37
  json.dump(data, f)
38
38
  logger.info(f"Wrote output file: {filename} ({f.tell()} bytes)")
39
39
 
40
40
 
41
41
  def _current_version() -> str:
42
42
  """fetch the current version of castor extractor running"""
43
- packages = pkg_resources.require("castor-extractor")
44
- return str(packages[0].version)
43
+ return version("castor-extractor")
45
44
 
46
45
 
47
46
  def write_summary(output_directory: str, ts: int, **kwargs):
@@ -17,7 +17,7 @@ SELECT
17
17
  schema_name = s.name,
18
18
  schema_id = CAST(d.database_id AS VARCHAR(10)) + '_' + CAST(s.schema_id AS VARCHAR(10)),
19
19
  schema_owner = u.name,
20
- schema_owner_id = u.uid
20
+ schema_owner_id = u.name
21
21
  FROM [{database}].sys.schemas AS s
22
22
  INNER JOIN ids AS i
23
23
  ON s.name = i.table_schema
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.52
3
+ Version: 0.24.55
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -36,7 +36,7 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
36
36
  Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
37
37
  Requires-Dist: google-auth (>=2,<3)
38
38
  Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
39
- Requires-Dist: google-cloud-storage (>=2,<3)
39
+ Requires-Dist: google-cloud-storage (>=3.1.0,<4.0.0)
40
40
  Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
41
41
  Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
42
42
  Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
@@ -215,6 +215,18 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
215
215
 
216
216
  # Changelog
217
217
 
218
+ ## 0.24.55 - 2025-09-19
219
+
220
+ * Fix encoding in LocalStorage - force to utf-8
221
+
222
+ ## 0.24.54 - 2025-09-18
223
+
224
+ * SqlServer: fix typo in the extraction query of schemas
225
+
226
+ ## 0.24.53 - 2025-09-18
227
+
228
+ * Fix CSV field size to support running on Windows
229
+
218
230
  ## 0.24.52 - 2025-09-18
219
231
 
220
232
  * SqlServer : improve extraction of users and technical owners
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=ruS47cNmG5EMJYGYtkGGyhh7A5NgNz4TxzS8h0lP_Co,20477
1
+ CHANGELOG.md,sha256=y8BAidkUDrMoQLEfu3LJLiqxoEUzI5hJZs4CUN_e1H0,20711
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -95,7 +95,7 @@ castor_extractor/uploader/settings.py,sha256=sUZpg9eHemM99DMrBW8bnlMuoTmCmLCKq-D
95
95
  castor_extractor/uploader/upload.py,sha256=b2g9vWWjXWbt8Ms7brTc7OK_I7Z-1VSibNbppGoB2oQ,4764
96
96
  castor_extractor/uploader/upload_test.py,sha256=UgN7TnT9Chn6KVzRcAX0Tuvp7-tps3ugxGitlgb9TSY,462
97
97
  castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
98
- castor_extractor/utils/__init__.py,sha256=z_BdKTUyuug3I5AzCuSGrAVskfLax4_olfORIjhZw_M,1691
98
+ castor_extractor/utils/__init__.py,sha256=LhcSQe50m5iv2zlEPfJUa3VY4pDcoMctqLByAWNV7As,1706
99
99
  castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
100
100
  castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
101
101
  castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
@@ -119,12 +119,12 @@ castor_extractor/utils/client/uri.py,sha256=jmP9hY-6PRqdc3-vAOdtll_U6q9VCqSqmBAN
119
119
  castor_extractor/utils/client/uri_test.py,sha256=1XKF6qSseCeD4G4ckaNO07JXfGbt7XUVinOZdpEYrDQ,259
120
120
  castor_extractor/utils/collection.py,sha256=g2HmB0ievvYHWaZ8iEzkcPPkrBFsh6R6b_liBqcsMjc,3044
121
121
  castor_extractor/utils/collection_test.py,sha256=mlw33u4VidazQwWxJMvaFeYX3VB5CAj6rqRG-cRsLrw,2884
122
- castor_extractor/utils/constants.py,sha256=qBQprS9U66mS-RIBXiLujdTSV3WvGv40Bc0khP4Abdk,39
122
+ castor_extractor/utils/constants.py,sha256=xEUk-B__cqHPKz5_Ta9kHIsiR-a9qTXzpsTY-SzPRHo,63
123
123
  castor_extractor/utils/deprecate.py,sha256=aBIN2QqZUx5CBNZMFfOUhi8QqtPqRcJtmrN6xqfm-y8,805
124
124
  castor_extractor/utils/env.py,sha256=TqdtB50U8LE0993WhhEhpy89TJrHbjtIKjvg6KQ-5q0,596
125
125
  castor_extractor/utils/files.py,sha256=qKbfu5FRjsQdKnRmaJNd5EdX_F6gf5C5tV8LdoYKxs0,1527
126
126
  castor_extractor/utils/files_test.py,sha256=omRT3XSjaSAywYUoLh1SGWqYzl4UwBYKSYA9_7mXd_E,1542
127
- castor_extractor/utils/formatter.py,sha256=KFUIiT1mSMS464trrpneFC2aIC0MjocHS0BLzX0WkIk,4969
127
+ castor_extractor/utils/formatter.py,sha256=wys8LD5sB39g37aqIpJ3X-YXVspcsUmcuUFvZA4ODHg,5811
128
128
  castor_extractor/utils/formatter_test.csv,sha256=UCNqPs8-xrY1AdMSpuctVFXInQe3Z_EABP4rF-Jw5ks,3802
129
129
  castor_extractor/utils/formatter_test.json,sha256=yPP_z1ZEavaUskC-Hx33uGlwKoInHYOFKqsJ9NgwIFo,12527
130
130
  castor_extractor/utils/formatter_test.py,sha256=VPlRTPQOaAeCySNs1wU1jd3bMppqxkVpD1dyCLt6p94,1856
@@ -148,7 +148,7 @@ castor_extractor/utils/salesforce/constants.py,sha256=7yPmUeyn4IHQiHLDutXE0L_OBd
148
148
  castor_extractor/utils/salesforce/credentials.py,sha256=m_11LIaBrYVgH2bLo-QnxaIY5KhEdtfVXz9r2lb_fd0,1123
149
149
  castor_extractor/utils/salesforce/credentials_test.py,sha256=FQRyNk2Jsh6KtYiW20oL43CVnGjXLcAjdFATkE7jK0s,586
150
150
  castor_extractor/utils/salesforce/pagination.py,sha256=wJq0rKLdacFRggyHwB6Fh3K6iXPvL4QWhsDvZdjQjM8,849
151
- castor_extractor/utils/store.py,sha256=hnyrFwCsL48e9QrsBns-n8FospujZrkUy1P2YHAh_C0,2067
151
+ castor_extractor/utils/store.py,sha256=KAg5TzLd8jak1Gh5NK-iPu2buQIYNofx6lpXye7MRDU,2152
152
152
  castor_extractor/utils/string.py,sha256=IQqNum7CJwuSvDGPbTAmz46YwtYDYgJKeXY7iixdjI4,2370
153
153
  castor_extractor/utils/string_test.py,sha256=u3P2tAPhyfCLvD19rH_JcpHhPuWTHUdg0z_N_-Kxwno,2501
154
154
  castor_extractor/utils/time.py,sha256=jmP1QWg4lv21Jp_Oy71lfJ47hjNOSgHiBOFf964RMPU,1732
@@ -158,7 +158,7 @@ castor_extractor/utils/url.py,sha256=0YaKAz3EC5PgTb5A2TNOlxf1DANK40yw6hs7ArEtJaU
158
158
  castor_extractor/utils/url_test.py,sha256=LWzNdOZqjrDeLmvhPBYmP35mzhm7jGAXi021thiro1Y,1425
159
159
  castor_extractor/utils/validation.py,sha256=dRvC9SoFVecVZuLQNN3URq37yX2sBSW3-NxIxkcol5o,1894
160
160
  castor_extractor/utils/validation_test.py,sha256=A7P6VmI0kYX2aGIeEN12y7LsY7Kpm8pE4bdVFhbBAMw,1184
161
- castor_extractor/utils/write.py,sha256=Z_RYm47XeHiUPPUMYMuAjQrVZ18CAkL3daQHQG1XPlM,2148
161
+ castor_extractor/utils/write.py,sha256=KQVWF29N766avzmSb129IUWrId5c_8BtnYhVLmU6YIs,2133
162
162
  castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
163
163
  castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
164
164
  castor_extractor/visualization/domo/assets.py,sha256=bK1urFR2tnlWkVkkhR32mAKMoKbESNlop-CNGx-65PY,206
@@ -428,14 +428,14 @@ castor_extractor/warehouse/sqlserver/queries/.sqlfluff,sha256=yy0KQdz8I_67vnXyX8
428
428
  castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=ojiUQQnHXdWMbgaYOcxKBiwfi7rtu_tyamK6r4t4IBM,2929
429
429
  castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4dPeBCn85MEOXr1f-DPXxiI3RvvoE_1n8lsbTs26E0I,150
430
430
  castor_extractor/warehouse/sqlserver/queries/query.sql,sha256=bkENw7QovlG4MyYe5q3XNPs3ajUr_3bNzpbm0Y2upYo,821
431
- castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=jUnZ10kIZk44nKQ5KiyjZ0YFdypYQj__OlPDRq71EAw,909
431
+ castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=Fq_8-tCnArayON3fjd2oMWM9nuYaXX3aZMr5jOOfMuw,910
432
432
  castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=ggzatJOlOfGkMG1NS-hD-n1-3WLbV9Yh8IsQrEFO5X4,2831
433
433
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=MAlnTis43E3Amu1e1Oz_qhaX8Bz-iN0Lrbf9RiohX7Y,99
434
434
  castor_extractor/warehouse/sqlserver/queries/view_ddl.sql,sha256=9rynvx6MWg3iZzrWPB7haZfVKEPkxulzryE2g19x804,315
435
435
  castor_extractor/warehouse/sqlserver/query.py,sha256=c8f7_SEMR17DhbtzuYphWqWDQ0sCRy-nR442RRBZVYw,1773
436
436
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
437
- castor_extractor-0.24.52.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
438
- castor_extractor-0.24.52.dist-info/METADATA,sha256=0wd_HdsZnM75f8hXza9FNIvjipmHGUDHOe5yjIYX1Ig,27930
439
- castor_extractor-0.24.52.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
440
- castor_extractor-0.24.52.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
441
- castor_extractor-0.24.52.dist-info/RECORD,,
437
+ castor_extractor-0.24.55.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
438
+ castor_extractor-0.24.55.dist-info/METADATA,sha256=MhFCdByqa4_T7A4-Mb96-ISq07W6BP7M-RHgjSfI8iY,28172
439
+ castor_extractor-0.24.55.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
440
+ castor_extractor-0.24.55.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
441
+ castor_extractor-0.24.55.dist-info/RECORD,,