castor-extractor 0.15.4__py3-none-any.whl → 0.16.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.16.1 - 2024-04-02
4
+
5
+ * Systematically escape nul bytes on CSV write
6
+
7
+ ## 0.16.0 - 2024-03-26
8
+
9
+ * Use pydantic v2
3
10
 
4
11
  ## 0.15.4 - 2024-03-25
5
12
 
Dockerfile CHANGED
@@ -1,6 +1,6 @@
1
1
  # syntax=docker/dockerfile:1.5
2
2
 
3
- FROM --platform=linux/amd64 python:3.10-slim
3
+ FROM --platform=linux/amd64 python:3.11-slim
4
4
 
5
5
  ARG EXTRA
6
6
  ENV EXTRA=${EXTRA}
@@ -89,7 +89,7 @@ def test_DbtClient_list_job_identifiers():
89
89
 
90
90
  with patch(infer_path, return_value=40), patch(call_path) as mocked_call:
91
91
  mocked_call.return_value = jobs
92
- credentials = DbtCredentials(token="some-token", job_id=1)
92
+ credentials = DbtCredentials(token="some-token", job_id="1")
93
93
  dbt_client = DbtClient(credentials=credentials)
94
94
 
95
95
  jobs_ids = dbt_client.list_job_identifiers()
@@ -103,7 +103,7 @@ def test_DbtClient_fetch_artifacts():
103
103
  url = "https://cloud.getdbt.com/api/v2/accounts/40/runs/{}/artifacts/{}"
104
104
 
105
105
  with patch(infer_path, return_value=40), patch(call_path) as mocked_call:
106
- credentials = DbtCredentials(token="some-token", job_id=1)
106
+ credentials = DbtCredentials(token="some-token", job_id="1")
107
107
  dbt_client = DbtClient(credentials=credentials)
108
108
 
109
109
  dbt_client.fetch_run_results(run_id)
@@ -1,16 +1,15 @@
1
- from pydantic import BaseSettings, Extra, Field
1
+ from pydantic import Field
2
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
3
 
3
- _DEFAULT_CLOUD_URL = "https://cloud.getdbt.com"
4
+ DEFAULT_DBT_CLOUD_URL = "https://cloud.getdbt.com"
4
5
 
5
6
 
6
7
  class DbtCredentials(BaseSettings):
7
8
  """dbt credentials: host has default value"""
8
9
 
9
- host: str = Field(default=_DEFAULT_CLOUD_URL, env="CASTOR_DBT_HOST")
10
- job_id: str = Field(..., env="CASTOR_DBT_JOB_ID")
11
- token: str = Field(..., env="CASTOR_DBT_TOKEN")
12
-
13
- class Config:
14
- """constructor settings: ignore extra kwargs provided"""
15
-
16
- extra = Extra.ignore
10
+ host: str = Field(
11
+ default=DEFAULT_DBT_CLOUD_URL, validation_alias="CASTOR_DBT_HOST"
12
+ )
13
+ job_id: str = Field(..., validation_alias="CASTOR_DBT_JOB_ID")
14
+ token: str = Field(..., validation_alias="CASTOR_DBT_TOKEN")
15
+ model_config = SettingsConfigDict(extra="ignore", populate_by_name=True)
@@ -30,7 +30,14 @@ def _header(row: dict) -> Sequence[str]:
30
30
 
31
31
 
32
32
  def _scalar(value: Any) -> ScalarValue:
33
- if isinstance(value, (int, float, str)):
33
+ if isinstance(value, str):
34
+ if "\x00" in value: # infrequent error caused by bad encoding
35
+ value = remove_unsupported_byte(value)
36
+ logger.warning("Removed unsupported byte to write to csv")
37
+ return value
38
+
39
+ return value
40
+ if isinstance(value, (int, float)):
34
41
  return value
35
42
  if isinstance(value, (date, datetime)):
36
43
  return value.isoformat()
@@ -46,11 +53,11 @@ def _row(header: Sequence[str], row: dict) -> List[ScalarValue]:
46
53
  return [_scalar(row.get(h)) for h in header]
47
54
 
48
55
 
49
- def remove_unsupported_byte(row: List[ScalarValue]) -> List[ScalarValue]:
50
- return [
51
- re.sub("\x00", "", element) if isinstance(element, str) else element
52
- for element in row
53
- ]
56
+ def remove_unsupported_byte(element: ScalarValue) -> ScalarValue:
57
+ if not isinstance(element, str):
58
+ return element
59
+
60
+ return re.sub("\x00", "", element)
54
61
 
55
62
 
56
63
  def to_string_array(arr_json: str) -> List[str]:
@@ -85,11 +92,7 @@ def to_csv(buffer: IO[str], data: Iterable[dict]) -> bool:
85
92
  header = _header(row)
86
93
  writer.writerow(header)
87
94
  converted = _row(header, row)
88
- try:
89
- writer.writerow(converted)
90
- except csv.Error: # infrequent error caused by bad encoding
91
- writer.writerow(remove_unsupported_byte(converted))
92
- logger.warning("Removed unsupported byte to write to csv")
95
+ writer.writerow(converted)
93
96
  return True
94
97
 
95
98
 
@@ -68,7 +68,10 @@ def test__json_formatter():
68
68
  _test(formatter)
69
69
 
70
70
 
71
- def test__remove_unsupported_byte():
72
- row = [1, "foo", "bar\x00bie"]
73
- cleaned = remove_unsupported_byte(row)
74
- assert cleaned == [1, "foo", "barbie"]
71
+ @pytest.mark.parametrize(
72
+ "element, expected_output",
73
+ [(1, 1), ("foo", "foo"), ("bar\x00bie", "barbie")],
74
+ )
75
+ def test__remove_unsupported_byte(element, expected_output):
76
+ cleaned = remove_unsupported_byte(element)
77
+ assert cleaned == expected_output
@@ -1,22 +1,20 @@
1
+ from dataclasses import field
1
2
  from typing import List, Optional
2
3
 
4
+ from pydantic.dataclasses import dataclass
5
+
3
6
  from .constants import Urls
4
7
 
5
8
 
9
+ @dataclass
6
10
  class Credentials:
7
11
  """Class to handle PowerBI rest API permissions"""
8
12
 
9
- def __init__(
10
- self,
11
- *,
12
- tenant_id: str,
13
- client_id: str,
14
- secret: str,
15
- scopes: Optional[List[str]] = None,
16
- ):
17
- if scopes is None:
18
- scopes = [Urls.DEFAULT_SCOPE]
19
- self.tenant_id = tenant_id
20
- self.client_id = client_id
21
- self.secret = secret
22
- self.scopes = scopes
13
+ client_id: str
14
+ tenant_id: str
15
+ secret: str = field(metadata={"sensitive": True})
16
+ scopes: Optional[List[str]] = None
17
+
18
+ def __post_init__(self):
19
+ if self.scopes is None:
20
+ self.scopes = [Urls.DEFAULT_SCOPE]
@@ -273,6 +273,10 @@ class Client:
273
273
  self._wait_for_scan_result(scan_id)
274
274
  yield self._get_scan(scan_id)
275
275
 
276
+ def test_connection(self) -> None:
277
+ """Use credentials & verify requesting the API doesn't raise an error"""
278
+ self._header()
279
+
276
280
  def fetch(
277
281
  self,
278
282
  asset: PowerBiAsset,
@@ -4,6 +4,7 @@ from typing import Dict, Optional
4
4
  from ...utils import AbstractStorage, LocalStorage, write_summary
5
5
  from ..abstract import (
6
6
  CATALOG_ASSETS,
7
+ EXTERNAL_LINEAGE_ASSETS,
7
8
  QUERIES_ASSETS,
8
9
  VIEWS_ASSETS,
9
10
  SupportedAssets,
@@ -20,6 +21,7 @@ DATABRICKS_ASSETS: SupportedAssets = {
20
21
  WarehouseAssetGroup.QUERY: QUERIES_ASSETS,
21
22
  WarehouseAssetGroup.ROLE: (WarehouseAsset.USER,),
22
23
  WarehouseAssetGroup.VIEW_DDL: VIEWS_ASSETS,
24
+ WarehouseAssetGroup.EXTERNAL_LINEAGE: EXTERNAL_LINEAGE_ASSETS,
23
25
  }
24
26
 
25
27
  logger = logging.getLogger(__name__)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.15.4
3
+ Version: 0.16.1
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Provides-Extra: all
18
18
  Provides-Extra: bigquery
19
+ Provides-Extra: dbt
19
20
  Provides-Extra: looker
20
21
  Provides-Extra: metabase
21
22
  Provides-Extra: mysql
@@ -38,7 +39,8 @@ Requires-Dist: looker-sdk (>=22.4.0,<=23.0.0) ; extra == "looker" or extra == "a
38
39
  Requires-Dist: msal (>=1.20.0,<2.0.0) ; extra == "powerbi" or extra == "all"
39
40
  Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0) ; extra == "metabase" or extra == "postgres" or extra == "redshift" or extra == "all"
40
41
  Requires-Dist: pycryptodome (>=3.0.0,<4.0.0) ; extra == "metabase" or extra == "all"
41
- Requires-Dist: pydantic (>=1.10,<2.0)
42
+ Requires-Dist: pydantic (>=2.6,<3.0)
43
+ Requires-Dist: pydantic-settings (>=2.2,<3.0)
42
44
  Requires-Dist: pymssql (>=2.2.11,<3.0.0) ; extra == "sqlserver" or extra == "all"
43
45
  Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all"
44
46
  Requires-Dist: python-dateutil (>=2.0.0,<=3.0.0)
@@ -1,5 +1,5 @@
1
- CHANGELOG.md,sha256=8mmULip1bn-hnzxQ7MGWp8MXCp-O2RlUrtt1tg-qefg,9723
2
- Dockerfile,sha256=TC6hFjG3mvnt1nkw2EpaS42hRYaGA2YIPKgWhVSKTWc,303
1
+ CHANGELOG.md,sha256=t1xfX_GaaTJcrNGAJtvhbOZ-4fAeRdFKRH8eKil6xWM,9837
2
+ Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
3
3
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
4
4
  README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
5
5
  castor_extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -57,16 +57,16 @@ castor_extractor/utils/constants.py,sha256=qBQprS9U66mS-RIBXiLujdTSV3WvGv40Bc0kh
57
57
  castor_extractor/utils/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
58
58
  castor_extractor/utils/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
59
59
  castor_extractor/utils/dbt/client.py,sha256=xBjbT-p99TXY850ooEAgjNp33yfGDwjWJRbXzeJoVaI,5538
60
- castor_extractor/utils/dbt/client_test.py,sha256=h-IIrE2Fy0oD0uemjvJzof9xMO3AmkHciOVopKCNjJs,4529
61
- castor_extractor/utils/dbt/credentials.py,sha256=qkfs43_8pNqPGmKc7xv7sb1IhbheXzgwt6yYdjyomAo,479
60
+ castor_extractor/utils/dbt/client_test.py,sha256=FO_vpnECE-hoK0rZHbqDv17oaJj3-uPhFEqTrMPzUf4,4533
61
+ castor_extractor/utils/dbt/credentials.py,sha256=pGq7GqFQTw9TwN1DXSHC-0yJ2H6B_wMAbHyQTLqJVh0,543
62
62
  castor_extractor/utils/deprecate.py,sha256=_uzQiwHiz2yEqQeNMmzvVmBY46IgBhhEbGPhTrVjZU4,817
63
63
  castor_extractor/utils/env.py,sha256=TqdtB50U8LE0993WhhEhpy89TJrHbjtIKjvg6KQ-5q0,596
64
64
  castor_extractor/utils/files.py,sha256=3C_u7P-kSZoOABVaKsuaf8lEhldRRxyxD27-K18_dEU,1545
65
65
  castor_extractor/utils/files_test.py,sha256=omRT3XSjaSAywYUoLh1SGWqYzl4UwBYKSYA9_7mXd_E,1542
66
- castor_extractor/utils/formatter.py,sha256=v50OyVWPDn-QdBm-XxSNGQibPPePeJIrcDcK3TATLRM,4934
66
+ castor_extractor/utils/formatter.py,sha256=EJpwl5ff9zahbWpCcoHcrDTVzBSqwyZQffuvh1JmPbk,4948
67
67
  castor_extractor/utils/formatter_test.csv,sha256=UCNqPs8-xrY1AdMSpuctVFXInQe3Z_EABP4rF-Jw5ks,3802
68
68
  castor_extractor/utils/formatter_test.json,sha256=yPP_z1ZEavaUskC-Hx33uGlwKoInHYOFKqsJ9NgwIFo,12527
69
- castor_extractor/utils/formatter_test.py,sha256=sFuiKmU5WYQN4ocg1tEKLN2bMpuuPWVbNqCn0x-jn8I,1752
69
+ castor_extractor/utils/formatter_test.py,sha256=VPlRTPQOaAeCySNs1wU1jd3bMppqxkVpD1dyCLt6p94,1856
70
70
  castor_extractor/utils/json_stream_write.py,sha256=OUdg4-47I47pgbpN9_a6y-lmxuod7aY6PObxjvB-wXI,2082
71
71
  castor_extractor/utils/load.py,sha256=MXwGVB_Dp_VitGwo71sNB_xDmGzQ4oQ13MhaXXyYkS0,265
72
72
  castor_extractor/utils/object.py,sha256=xCcQtoj9313TCcoyRXkLpDcMxmDeQMFMseDNx95oGc0,1959
@@ -158,9 +158,9 @@ castor_extractor/visualization/powerbi/__init__.py,sha256=XSr_fNSsR-EPuGOFo7Ai1r
158
158
  castor_extractor/visualization/powerbi/assets.py,sha256=SASUjxtoOMag3NAlZfhpCy0sLap7WfENEMaEZuBrw6o,801
159
159
  castor_extractor/visualization/powerbi/client/__init__.py,sha256=hU8LE1gV9RttTGJiwVpEa9xDLR4IMkUdshQGthg4zzE,62
160
160
  castor_extractor/visualization/powerbi/client/constants.py,sha256=Cx4pbgyAFc7t_aRQyWj7q-qfkltJl-JgKdMzeKmC9AI,2356
161
- castor_extractor/visualization/powerbi/client/credentials.py,sha256=_YYuHWzL6QbXeGVVmEF8vSZKYnWaQk5Mx-7-13StUMo,500
161
+ castor_extractor/visualization/powerbi/client/credentials.py,sha256=iiYaCa2FM1PBHv4YA0Z1LgdX9gnaQhvHGD0LQb7Tcxw,465
162
162
  castor_extractor/visualization/powerbi/client/credentials_test.py,sha256=23ZlLCvsPB_fmqntnzULkv0mMRE8NCzBXtWS6wupJn4,787
163
- castor_extractor/visualization/powerbi/client/rest.py,sha256=qhmA49tayNUzZsX53VsdqL7cOhWfbryVLwN5bH1zGfA,9500
163
+ castor_extractor/visualization/powerbi/client/rest.py,sha256=0gwqqmmzX76MzNRGfmcNkXw_jxVRAdMTViQExTBQy2Y,9644
164
164
  castor_extractor/visualization/powerbi/client/rest_test.py,sha256=r5rS_1FMwHCDWbYdco11-zvDJ5jYk9l8-VVJcpCtbwM,7343
165
165
  castor_extractor/visualization/powerbi/client/utils.py,sha256=0RcoWcKOdvIGH4f3lYDvufmiMo4tr_ABFlITSrvXjTs,541
166
166
  castor_extractor/visualization/powerbi/client/utils_test.py,sha256=ULHL2JLrcv0xjW2r7QF_ce2OaGeeSzajkMDywJ8ZdVA,719
@@ -265,7 +265,7 @@ castor_extractor/warehouse/databricks/__init__.py,sha256=bTvDxjGQGM2J3hOnVhfNmFP
265
265
  castor_extractor/warehouse/databricks/client.py,sha256=iojSVTARx5JmGy2Tm8D2H5wHO5hqGigVG9Ql2vHNdz8,7375
266
266
  castor_extractor/warehouse/databricks/client_test.py,sha256=rsqHWmVOgvqQ3VmYKJrpWpcGATD_C9FD1sG4CJsin2E,2201
267
267
  castor_extractor/warehouse/databricks/credentials.py,sha256=sMpOAKhBklcmTpcr3mi3o8qLud__8PTZbQUT3K_TRY8,678
268
- castor_extractor/warehouse/databricks/extract.py,sha256=7Tyr20nVlbDex-IjDSdX5VHj4NWvc3sWyfyiKBqPDwI,5612
268
+ castor_extractor/warehouse/databricks/extract.py,sha256=eyt9LihZ9GfHEh8Z2c9PXAHqK6hibPsEIUOKGYfMwg8,5708
269
269
  castor_extractor/warehouse/databricks/format.py,sha256=tCBCApW5iZMBx04p-oCUs36d4JqNqJsBDHe6f-A7eiU,4925
270
270
  castor_extractor/warehouse/databricks/format_test.py,sha256=iPmdJof43fBYL1Sa_fBrCWDQHCHgm7IWCZag1kWkj9E,1970
271
271
  castor_extractor/warehouse/databricks/types.py,sha256=T2SyLy9pY_olLtstdC77moPxIiikVsuQLMxh92YMJQo,78
@@ -346,7 +346,7 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
346
346
  castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
347
347
  castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
348
348
  castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
349
- castor_extractor-0.15.4.dist-info/METADATA,sha256=sLC8V6FB8j-hGW4iuODYF5W_jpgIkXpJBd2WHa5f-0k,6347
350
- castor_extractor-0.15.4.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
351
- castor_extractor-0.15.4.dist-info/entry_points.txt,sha256=EQUCoNjSHevxmY5ZathX_fLZPcuBHng23rj0SSUrLtI,1345
352
- castor_extractor-0.15.4.dist-info/RECORD,,
349
+ castor_extractor-0.16.1.dist-info/METADATA,sha256=tjFndsdmxa0NO7qy3Ddcz1pFZGnDsfPmTQ3x3o-UEeA,6412
350
+ castor_extractor-0.16.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
351
+ castor_extractor-0.16.1.dist-info/entry_points.txt,sha256=EQUCoNjSHevxmY5ZathX_fLZPcuBHng23rj0SSUrLtI,1345
352
+ castor_extractor-0.16.1.dist-info/RECORD,,