castor-extractor 0.25.7__py3-none-any.whl → 0.25.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.25.10 - 2025-10-09
4
+
5
+ * Fix import
6
+
7
+ ## 0.25.9 - 2025-10-09
8
+
9
+ * Snowflake: raise an exception when no database is available for extraction
10
+ * Databricks: raise an exception when no database is available for extraction
11
+ * BigQuery: raise an exception when no project is available for extraction
12
+
13
+ ## 0.25.8 - 2025-10-09
14
+
15
+ * Count: extracting queries and canvas_loads
16
+
3
17
  ## 0.25.7 - 2025-10-07
4
18
 
5
19
  * SqlServer: Ensure database consistency between query and engine
@@ -0,0 +1,5 @@
1
+ class NoDatabaseProvidedException(Exception):
2
+ def __init__(self):
3
+ super().__init__("""No database eligible for extraction.
4
+ If you are using the db_allow/db_block options, please make sure to use the correct case.
5
+ """)
@@ -5,7 +5,9 @@ class CountAsset(ExternalAsset):
5
5
  """Count assets"""
6
6
 
7
7
  CANVASES = "canvases"
8
+ CANVAS_LOADS = "canvas_loads"
8
9
  CANVAS_PERMISSIONS = "canvas_permissions"
9
10
  CELLS = "cells"
10
11
  PROJECTS = "projects"
12
+ QUERIES = "queries"
11
13
  USERS = "users"
@@ -3,6 +3,7 @@ from dataclasses import asdict
3
3
  from typing import Any, Iterator
4
4
 
5
5
  from ....utils import load_file
6
+ from ....warehouse.abstract import TimeFilter
6
7
  from ....warehouse.bigquery import BigQueryClient
7
8
  from ..assets import (
8
9
  CountAsset,
@@ -27,18 +28,21 @@ class CountClient(BigQueryClient):
27
28
  super().__init__(asdict(credentials))
28
29
  self.project_id = credentials.project_id
29
30
  self.dataset_id = credentials.dataset_id
31
+ self.time_filter = TimeFilter.default() # setting current date - 1
30
32
 
31
33
  def _load_query(self, asset: CountAsset) -> str:
32
34
  query = load_file(
33
35
  f"{_QUERIES_FOLDER}/{asset.name.lower()}.sql", __file__
34
36
  )
35
37
  return query.format(
36
- project_id=self.project_id, dataset_id=self.dataset_id
38
+ project_id=self.project_id,
39
+ dataset_id=self.dataset_id,
40
+ extract_date=self.time_filter.day,
37
41
  )
38
42
 
39
43
  def fetch(self, asset: CountAsset) -> Iterator[dict[str, Any]]:
40
44
  """
41
- Fetch the asset given as param, by running a BigQuery query.
45
+ Fetch the asset given as a param by running a BigQuery query.
42
46
  """
43
47
  logger.info(f"Running BigQuery query to fetch: {asset.name}")
44
48
 
@@ -0,0 +1,14 @@
1
+ WITH view_count AS(
2
+ SELECT
3
+ canvas_key,
4
+ DATE(loaded_at) AS load_day
5
+ FROM `{project_id}.{dataset_id}.canvas_loads`
6
+ WHERE TRUE
7
+ AND date(loaded_at) = '{extract_date}'
8
+ )
9
+
10
+ SELECT
11
+ canvas_key,
12
+ COUNT(*) AS view_count
13
+ FROM view_count
14
+ GROUP BY canvas_key
@@ -0,0 +1,23 @@
1
+ WITH ranked_queries AS (
2
+ SELECT
3
+ cell_key,
4
+ connection_key,
5
+ query,
6
+ started_at,
7
+ ROW_NUMBER() OVER (
8
+ PARTITION BY cell_key
9
+ ORDER BY started_at DESC
10
+ ) AS rank
11
+ FROM `{project_id}.{dataset_id}.queries`
12
+ WHERE TRUE
13
+ AND DATE(started_at) = '{extract_date}'
14
+ )
15
+
16
+ SELECT
17
+ cell_key,
18
+ connection_key,
19
+ query,
20
+ started_at
21
+ FROM ranked_queries
22
+ WHERE TRUE
23
+ AND rank = 1
@@ -48,7 +48,7 @@ class SQLExtractionProcessor:
48
48
  # dict > set > dict
49
49
  return [dict(t) for t in {tuple(d.items()) for d in data}]
50
50
 
51
- def _fetch(self, query: ExtractionQuery) -> Iterator[dict]:
51
+ def fetch(self, query: ExtractionQuery) -> Iterator[dict]:
52
52
  default: Callable[[], Iterator] = lambda: iter(()) # type: ignore
53
53
  decorator = safe_mode(self._safe_mode, default)
54
54
  decorated_execute = decorator(self._client.execute)
@@ -62,7 +62,7 @@ class SQLExtractionProcessor:
62
62
  for i, query in enumerate(queries):
63
63
  logger.info(f"Extracting {asset.value}: query {i + 1}/{total}")
64
64
  # concatenate results of all queries
65
- data = chain(data, self._fetch(query))
65
+ data = chain(data, self.fetch(query))
66
66
 
67
67
  if self._query_builder.needs_deduplication(asset):
68
68
  # cast the list to iterator, but the streaming pipeline is broken in that case
@@ -2,6 +2,9 @@ import json
2
2
  import logging
3
3
  from typing import cast
4
4
 
5
+ from ...exceptions import (
6
+ NoDatabaseProvidedException,
7
+ )
5
8
  from ...logger import add_logging_file_handler
6
9
  from ...utils import LocalStorage, SafeMode, from_env, write_summary
7
10
  from ..abstract import (
@@ -62,6 +65,9 @@ def extract_all(**kwargs) -> None:
62
65
  db_blocked=kwargs.get("db_blocked"),
63
66
  dataset_blocked=kwargs.get("dataset_blocked"),
64
67
  )
68
+ projects = client.get_projects()
69
+ if not projects:
70
+ raise NoDatabaseProvidedException
65
71
 
66
72
  logger.info(f"Available projects: {client.get_projects()}\n")
67
73
 
@@ -2,6 +2,9 @@ import logging
2
2
  from datetime import date
3
3
  from typing import Optional
4
4
 
5
+ from ...exceptions import (
6
+ NoDatabaseProvidedException,
7
+ )
5
8
  from ...utils import AbstractStorage, LocalStorage, write_summary
6
9
  from ..abstract import (
7
10
  ADDITIONAL_LINEAGE_ASSETS,
@@ -177,6 +180,9 @@ def extract_all(**kwargs) -> None:
177
180
  db_blocked=kwargs.get("db_blocked"),
178
181
  )
179
182
 
183
+ if not client.databases():
184
+ raise NoDatabaseProvidedException
185
+
180
186
  storage = LocalStorage(directory=output_directory)
181
187
 
182
188
  extractor = DatabricksExtractionProcessor(
@@ -1,5 +1,8 @@
1
1
  import logging
2
2
 
3
+ from ...exceptions import (
4
+ NoDatabaseProvidedException,
5
+ )
3
6
  from ...utils import LocalStorage, from_env, write_summary
4
7
  from ..abstract import (
5
8
  CATALOG_ASSETS,
@@ -60,6 +63,14 @@ def _credentials(params: dict) -> dict:
60
63
  raise ValueError("missing password or private key")
61
64
 
62
65
 
66
+ def _get_database_names(
67
+ extractor: SQLExtractionProcessor, query_builder: SnowflakeQueryBuilder
68
+ ) -> set[str]:
69
+ db_query = query_builder.build(WarehouseAsset.DATABASE)
70
+ databases = list(extractor.fetch(db_query[0]))
71
+ return {db["database_name"] for db in databases}
72
+
73
+
63
74
  def extract_all(**kwargs) -> None:
64
75
  """
65
76
  Extract all assets from Snowflake and store the results in CSV files
@@ -85,6 +96,12 @@ def extract_all(**kwargs) -> None:
85
96
  storage=storage,
86
97
  )
87
98
 
99
+ database_names = _get_database_names(extractor, query_builder)
100
+ if not database_names:
101
+ raise NoDatabaseProvidedException
102
+
103
+ logger.info(f"Available databases: {database_names}\n")
104
+
88
105
  for group in extractable_asset_groups(SNOWFLAKE_ASSETS):
89
106
  for asset in group:
90
107
  logger.info(f"Extracting `{asset.value.upper()}` ...")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.25.7
3
+ Version: 0.25.10
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -216,6 +216,20 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
216
216
 
217
217
  # Changelog
218
218
 
219
+ ## 0.25.10 - 2025-10-09
220
+
221
+ * Fix import
222
+
223
+ ## 0.25.9 - 2025-10-09
224
+
225
+ * Snowflake: raise an exception when no database is available for extraction
226
+ * Databricks: raise an exception when no database is available for extraction
227
+ * BigQuery: raise an exception when no project is available for extraction
228
+
229
+ ## 0.25.8 - 2025-10-09
230
+
231
+ * Count: extracting queries and canvas_loads
232
+
219
233
  ## 0.25.7 - 2025-10-07
220
234
 
221
235
  * SqlServer: Ensure database consistency between query and engine
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=59CuAENUDPUzehj5T9VIKCbS3bG-HGUQ-7U_QtegrvA,21528
1
+ CHANGELOG.md,sha256=-GLL2qrB_fkho3FplO7T7Fd6WtPbwEaA-9nfEhXWjtg,21892
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -31,6 +31,7 @@ castor_extractor/commands/extract_tableau.py,sha256=cfH-b0Hq9LGrQSJv02Yr_4d6oNqh
31
31
  castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
32
32
  castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
33
33
  castor_extractor/commands/upload.py,sha256=sqpEF_qqCNvT_niIrM6jPhzLaFVjtYwpc2iZw540F20,1633
34
+ castor_extractor/exceptions.py,sha256=WypgqL8AxYL1viYUCSY4528pk5TRcqVNPvyLMMMDWGw,238
34
35
  castor_extractor/file_checker/__init__.py,sha256=OSt6YLhUT42U_Cp3LCLHMVruwDkksL75Ij13X2UPnVk,119
35
36
  castor_extractor/file_checker/column.py,sha256=6bJhcW1snYwgHKkqlS0Ak7XLHZr4YBwO46JCIlnQNKg,3086
36
37
  castor_extractor/file_checker/column_test.py,sha256=1j8PxvmvmJgpd-mk30iMYOme32ovPSIn4yCXywFoXrg,1935
@@ -162,14 +163,16 @@ castor_extractor/utils/validation_test.py,sha256=A7P6VmI0kYX2aGIeEN12y7LsY7Kpm8p
162
163
  castor_extractor/utils/write.py,sha256=KQVWF29N766avzmSb129IUWrId5c_8BtnYhVLmU6YIs,2133
163
164
  castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
165
  castor_extractor/visualization/count/__init__.py,sha256=lvxGtSe3erjTYK0aPnkOyJibcsC6Q1AFchnK-hZt558,114
165
- castor_extractor/visualization/count/assets.py,sha256=VZCRVDKWSu6l2lVGJS4JKOOmfCUkbS8MnJiLcAY9vqw,232
166
+ castor_extractor/visualization/count/assets.py,sha256=zvj2FuCrZlyKPAd8lx5rgWPKsEQ0biQJCk7AEYj6qd0,290
166
167
  castor_extractor/visualization/count/client/__init__.py,sha256=YawYDutDI0sprp72jN9tKi8bbXCoc0Ij0Ev582tKjqk,74
167
- castor_extractor/visualization/count/client/client.py,sha256=WgljCj8G7D0Brxa0llaeOQ2Ipd7FvtDWFoLWoPyqT9A,1523
168
+ castor_extractor/visualization/count/client/client.py,sha256=vSrV8V3acQAz9Hc2_DKjeU500bx2FT-DHrzUwTdjroo,1706
168
169
  castor_extractor/visualization/count/client/credentials.py,sha256=LZWvcz7p5lrgdgoIQLcxFyv4gqUBW4Jj4qDKN-VW31I,273
170
+ castor_extractor/visualization/count/client/queries/canvas_loads.sql,sha256=suR5_X2SU9UIGwDi7mSxZrJvoh_-4WJwgtfBYnKMV1E,288
169
171
  castor_extractor/visualization/count/client/queries/canvas_permissions.sql,sha256=iFmMfR0zusjxTxmYUS6p0kibZCsnHOQMbAlxaNjx-H4,108
170
172
  castor_extractor/visualization/count/client/queries/canvases.sql,sha256=Ur5HBD9JJH0r14xIj_rwoctnds082_F931vlfcnwi_I,86
171
173
  castor_extractor/visualization/count/client/queries/cells.sql,sha256=Kkk0jyU337PD6RPshSo_ucLl5PS7kIvJZlUnVnmJUkM,111
172
174
  castor_extractor/visualization/count/client/queries/projects.sql,sha256=3Jem3QCVwk4wHiWRJL7cN6Vl2Yc5RZ8yC8ndvPAkaFM,68
175
+ castor_extractor/visualization/count/client/queries/queries.sql,sha256=ffnlRwMedTVoMzuXkQaTWw5oOP-Ties9vGCfQOXdhQ0,456
173
176
  castor_extractor/visualization/count/client/queries/users.sql,sha256=H0n7S7P5cCAWbgPxU32psIc1epXySzsAaQ7MQ9JrkfM,102
174
177
  castor_extractor/visualization/count/extract.py,sha256=ZBsJ9tMxxaq1jG8qJp_OGVK3yPDNkVUsP1_3rcUMtYg,1378
175
178
  castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
@@ -326,7 +329,7 @@ castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
326
329
  castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
327
330
  castor_extractor/warehouse/abstract/asset.py,sha256=wR5mJxAHBcqJ86HRb_Y8x3mDN4uUgSg8jMToLNu0jTM,2740
328
331
  castor_extractor/warehouse/abstract/asset_test.py,sha256=_kd4ybNlWSAdSdEgJKC-jhJTa1nMRa9i8RO3YbqKLM4,758
329
- castor_extractor/warehouse/abstract/extract.py,sha256=9Y2fUn3y2-2WjiHnrabjvAvOA8UETJeTYr18zcM7bdI,2924
332
+ castor_extractor/warehouse/abstract/extract.py,sha256=a3Poqm1hF6NQQNAWQVib9_uj41LcvXzoX8IQCPCT66c,2922
330
333
  castor_extractor/warehouse/abstract/query.py,sha256=h1VvSo6TpyS1myRqmPtoIFY1fVgbthsHOkkajUz-PKA,2444
331
334
  castor_extractor/warehouse/abstract/time_filter.py,sha256=bggIONfMmUxffkA6TwM3BsjfS2l9WFxPq8krfsau5pw,935
332
335
  castor_extractor/warehouse/abstract/time_filter_test.py,sha256=PIkegB7KOKBdpc6zIvmyl_CeQyADeFDplyQ8HTNU5LA,448
@@ -334,7 +337,7 @@ castor_extractor/warehouse/bigquery/__init__.py,sha256=PCGNYdi7dHv-SyanUWzRuBp-y
334
337
  castor_extractor/warehouse/bigquery/client.py,sha256=lPAn6WUwDq0rIPNaMOcabet8C4TYJ93EWZUxX72XrZc,5595
335
338
  castor_extractor/warehouse/bigquery/client_test.py,sha256=Ym8e4d--0YQwiVcNUnXLx0X-X6ZznwNMBMbMaDS5oEA,1514
336
339
  castor_extractor/warehouse/bigquery/credentials.py,sha256=oCZ8H7qpudKzwM7PRMpVAmWXt7bjIRa8Harmp-ysQJ4,425
337
- castor_extractor/warehouse/bigquery/extract.py,sha256=TAxpdwZ6kfSe7xv22vcFwAE3-Rr1VF9UnX0DM_IPdF8,2934
340
+ castor_extractor/warehouse/bigquery/extract.py,sha256=N2LZz6UU0CRRhOw66mrnTmImiIxVn-_sGsCxOCW4wcY,3097
338
341
  castor_extractor/warehouse/bigquery/queries/.sqlfluff,sha256=ce8UDW2k39v6RBVxgKqjOHHYMoGN9S9f7BCZNHHhox8,30
339
342
  castor_extractor/warehouse/bigquery/queries/column.sql,sha256=NxdTnHwomHTEGSc-UoXFKUwg59I9XAOwrSau7JUqGQE,1815
340
343
  castor_extractor/warehouse/bigquery/queries/cte/sharded.sql,sha256=-G7_4lxV7UPe72mYlp4HDGeM_fJjZWuXJ7Q0vxvj5_U,1454
@@ -355,7 +358,7 @@ castor_extractor/warehouse/databricks/client_test.py,sha256=dqEdEAt-6e8CtQ7M2L5v
355
358
  castor_extractor/warehouse/databricks/credentials.py,sha256=ExtVcl2NpMXTx1Lg8vHQdzQtSEm2aqpg3D1BJrNAUjI,528
356
359
  castor_extractor/warehouse/databricks/endpoints.py,sha256=qPoL9CtPFJdwVuW9rJ37nmeMd-nChOBouEVYb4SlaUE,670
357
360
  castor_extractor/warehouse/databricks/enums.py,sha256=3T6BbVvbWvfWkD23krsYT1x0kKh1qRzNPl6WpcXe300,274
358
- castor_extractor/warehouse/databricks/extract.py,sha256=Z4VTEIf0QMiua0QGAlJdQ86kxmGAXekQ304aCKme6IY,7358
361
+ castor_extractor/warehouse/databricks/extract.py,sha256=mv780vnwJQ9vHhBRBR5SNcc7Z8PbGCLOrq-XSV0uziA,7495
359
362
  castor_extractor/warehouse/databricks/format.py,sha256=S3BOcwJubc1pyKr-li26uftUUfsjfrm5Qf4LqmElXVk,6736
360
363
  castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0zdca7Pms4seCHimbw8NAoAI,3322
361
364
  castor_extractor/warehouse/databricks/pagination.py,sha256=sM1G0sN1pf1TPpI0Y3Oew378UGEKVkMRc2Mlu9tDjLo,545
@@ -418,7 +421,7 @@ castor_extractor/warehouse/snowflake/client.py,sha256=RB72bbl_k91wDU76yrggPK6oeE
418
421
  castor_extractor/warehouse/snowflake/client_test.py,sha256=ihWtOOAQfh8pu5JTr_EWfqefKOVIaJXznACURzaU1Qs,1432
419
422
  castor_extractor/warehouse/snowflake/credentials.py,sha256=u0sZ6xPtcZmmvnUsAejJk-YxGl8BTzX_BlRjRk92BYU,932
420
423
  castor_extractor/warehouse/snowflake/credentials_test.py,sha256=Lkc-DHXOvr50KrqAW4nt_x0IA0Mu_CsBVu6ATnzQB6I,673
421
- castor_extractor/warehouse/snowflake/extract.py,sha256=eGtIqW5kKJl-e36viqYQzkXn39CJkmMBR4oSt-B0ud4,3082
424
+ castor_extractor/warehouse/snowflake/extract.py,sha256=N-lZHOTW0bQyEeKA9EuhPB9YkAd1UjyHKkEsl8hat6g,3625
422
425
  castor_extractor/warehouse/snowflake/queries/.sqlfluff,sha256=vttrwcr64JVIuvc7WIg9C54cbOkjg_VjXNR7YnTGOPE,31
423
426
  castor_extractor/warehouse/snowflake/queries/column.sql,sha256=Ru-yC0s76I9LehOA4aCZ--xz6D9H1Hyr3OZdILOBHAw,1882
424
427
  castor_extractor/warehouse/snowflake/queries/column_lineage.sql,sha256=YKBiZ6zySSNcXLDXwm31EjGIIkkkZc0-S6hI1SRM80o,1179
@@ -443,8 +446,8 @@ castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=MAlnTis43E3Amu1e1Oz
443
446
  castor_extractor/warehouse/sqlserver/queries/view_ddl.sql,sha256=9rynvx6MWg3iZzrWPB7haZfVKEPkxulzryE2g19x804,315
444
447
  castor_extractor/warehouse/sqlserver/query.py,sha256=gr5lnZSUm-wSYuVnJlg6fc7jXWirbL-sCiQN9RnAiPQ,1789
445
448
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
446
- castor_extractor-0.25.7.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
447
- castor_extractor-0.25.7.dist-info/METADATA,sha256=omciSlMhL8CWEtzosDWL1hj0SP2C8DiI4z0CeMb-nTs,29030
448
- castor_extractor-0.25.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
449
- castor_extractor-0.25.7.dist-info/entry_points.txt,sha256=qyTrKNByoq2HYi1xbA79OU7qxg-OWPvle8VwDqt-KnE,1869
450
- castor_extractor-0.25.7.dist-info/RECORD,,
449
+ castor_extractor-0.25.10.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
450
+ castor_extractor-0.25.10.dist-info/METADATA,sha256=G36bUjs5FkXG0aihDl5OmmTf_bFN01Js5Xlbp9T7crA,29395
451
+ castor_extractor-0.25.10.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
452
+ castor_extractor-0.25.10.dist-info/entry_points.txt,sha256=qyTrKNByoq2HYi1xbA79OU7qxg-OWPvle8VwDqt-KnE,1869
453
+ castor_extractor-0.25.10.dist-info/RECORD,,