castor-extractor 0.24.33__py3-none-any.whl → 0.24.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (24) hide show
  1. CHANGELOG.md +9 -1
  2. castor_extractor/commands/extract_sqlserver.py +12 -0
  3. castor_extractor/transformation/coalesce/client/client.py +92 -88
  4. castor_extractor/transformation/coalesce/client/pagination.py +26 -0
  5. castor_extractor/utils/__init__.py +7 -1
  6. castor_extractor/utils/client/api/pagination.py +5 -2
  7. castor_extractor/utils/collection.py +26 -0
  8. castor_extractor/utils/collection_test.py +31 -1
  9. castor_extractor/visualization/looker_studio/client/queries/query.sql +1 -0
  10. castor_extractor/warehouse/sqlserver/client.py +15 -0
  11. castor_extractor/warehouse/sqlserver/extract.py +8 -2
  12. castor_extractor/warehouse/sqlserver/queries/column.sql +10 -10
  13. castor_extractor/warehouse/sqlserver/queries/database.sql +1 -1
  14. castor_extractor/warehouse/sqlserver/queries/schema.sql +5 -6
  15. castor_extractor/warehouse/sqlserver/queries/table.sql +12 -14
  16. castor_extractor/warehouse/sqlserver/query.py +30 -1
  17. {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/METADATA +10 -2
  18. {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/RECORD +21 -23
  19. castor_extractor/transformation/coalesce/client/type.py +0 -1
  20. castor_extractor/transformation/coalesce/client/utils.py +0 -52
  21. castor_extractor/transformation/coalesce/client/utils_test.py +0 -54
  22. {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/LICENCE +0 -0
  23. {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/WHEEL +0 -0
  24. {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/entry_points.txt +0 -0
CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.35 - 2025-07-29
4
+
5
+ * Coalesce - Fix pagination issue
6
+
7
+ ## 0.24.34 - 2025-07-02
8
+
9
+ * SQLServer: multiple databases
10
+
3
11
  ## 0.24.33 - 2025-07-10
4
12
 
5
13
  * Tableau - Add an option to skip fields ingestion
@@ -10,7 +18,7 @@
10
18
 
11
19
  ## 0.24.31 - 2025-07-02
12
20
 
13
- * Looker Studio: add option to list users via a provided JSON file
21
+ * Looker Studio: add an option to list users via a provided JSON file
14
22
 
15
23
  ## 0.24.30 - 2025-06-26
16
24
 
@@ -23,6 +23,16 @@ def main():
23
23
  action="store_true",
24
24
  help="Skips files already extracted instead of replacing them",
25
25
  )
26
+ parser.add_argument(
27
+ "--db-allowed",
28
+ nargs="*",
29
+ help="List of databases that should be extracted",
30
+ )
31
+ parser.add_argument(
32
+ "--db-blocked",
33
+ nargs="*",
34
+ help="List of databases that should not be extracted",
35
+ )
26
36
  parser.set_defaults(skip_existing=False)
27
37
 
28
38
  args = parser.parse_args()
@@ -35,4 +45,6 @@ def main():
35
45
  password=args.password,
36
46
  output_directory=args.output,
37
47
  skip_existing=args.skip_existing,
48
+ db_allowed=args.db_allowed,
49
+ db_blocked=args.db_blocked,
38
50
  )
@@ -1,31 +1,47 @@
1
1
  import logging
2
+ from functools import partial
2
3
  from http import HTTPStatus
3
- from typing import Iterator, Optional
4
+ from typing import Callable, Optional
4
5
 
5
- from requests import ConnectionError
6
+ from pydantic import ValidationError
6
7
 
7
8
  from ....utils import (
8
9
  APIClient,
9
10
  BearerAuth,
10
11
  RequestSafeMode,
11
12
  SerializedAsset,
13
+ fetch_all_pages,
12
14
  )
13
15
  from ..assets import CoalesceAsset, CoalesceQualityAsset
14
16
  from .credentials import CoalesceCredentials
15
17
  from .endpoint import (
16
18
  CoalesceEndpointFactory,
17
19
  )
18
- from .type import NodeIDToNamesMapping
19
- from .utils import column_names_per_node, is_test, test_names_per_node
20
+ from .pagination import CoalescePagination
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ COALESCE_PAGE_SIZE = 300
26
+ COALESCE_PAGE_SIZE_RUN_RESULTS = 1_000
27
+
28
+ COALESCE_TIMEOUT_SECONDS = 90
20
29
 
21
- _LIMIT_MAX = 1_000
22
30
  _MAX_ERRORS = 200
23
31
 
24
- logger = logging.getLogger(__name__)
32
+ COALESCE_SAFE_MODE = RequestSafeMode(
33
+ status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
34
+ max_errors=_MAX_ERRORS,
35
+ )
25
36
 
26
37
 
27
- def _run_result_payload(result: dict, query_result: dict) -> dict:
38
+ def _run_result_payload(
39
+ environment_id: str,
40
+ result: dict,
41
+ query_result: dict,
42
+ ) -> dict:
28
43
  return {
44
+ "environment_id": environment_id,
29
45
  "node_id": result["nodeID"],
30
46
  "node_name": result["name"],
31
47
  "test_name": query_result["name"],
@@ -37,13 +53,6 @@ def _run_result_payload(result: dict, query_result: dict) -> dict:
37
53
  }
38
54
 
39
55
 
40
- COALESCE_SAFE_MODE = RequestSafeMode(
41
- status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
42
- max_errors=_MAX_ERRORS,
43
- )
44
- COALESCE_TIMEOUT_SECONDS = 90
45
-
46
-
47
56
  class CoalesceBearerAuth(BearerAuth):
48
57
  """Bearer Authentication for Coalesce"""
49
58
 
@@ -69,65 +78,74 @@ class CoalesceClient(APIClient):
69
78
  timeout=COALESCE_TIMEOUT_SECONDS,
70
79
  )
71
80
 
72
- def _fetch_environments(self) -> Iterator[dict]:
73
- endpoint = CoalesceEndpointFactory.environments()
74
- result = self._get(endpoint=endpoint)
75
- return result["data"]
76
-
77
- def _node_details(self, environment_id: int, node_id: str) -> dict:
78
- endpoint = CoalesceEndpointFactory.nodes(
79
- environment_id=environment_id, node_id=node_id
81
+ def _get_paginated(
82
+ self,
83
+ endpoint: str,
84
+ limit: int = COALESCE_PAGE_SIZE,
85
+ params: Optional[dict] = None,
86
+ ) -> Callable:
87
+ return partial(
88
+ self._get,
89
+ retry_on_timeout=False, # explained in the docstring
90
+ endpoint=endpoint,
91
+ params={
92
+ "limit": limit,
93
+ **(params or dict()),
94
+ },
80
95
  )
81
- return self._get(endpoint=endpoint)
96
+
97
+ def _fetch_environments(self) -> SerializedAsset:
98
+ endpoint = CoalesceEndpointFactory.environments()
99
+ request = self._get_paginated(endpoint=endpoint)
100
+ result = fetch_all_pages(request, CoalescePagination)
101
+ return list(result)
82
102
 
83
103
  def _fetch_env_nodes(self, environment_id: int) -> SerializedAsset:
84
104
  endpoint = CoalesceEndpointFactory.nodes(environment_id=environment_id)
85
- result = self._get(endpoint=endpoint)
86
- nodes: list[dict] = []
87
- for node in result["data"]:
88
- try:
89
- details = self._node_details(environment_id, node["id"])
90
- nodes.append({**node, **details})
91
- except ConnectionError as e:
92
- node_id = node["id"]
93
- message = f"ConnectionError, environment: {environment_id}, node: {node_id}"
94
- logger.warning(message)
95
- raise e
96
- return nodes
105
+ request = self._get_paginated(
106
+ endpoint=endpoint,
107
+ params={"detail": "true"},
108
+ )
109
+ result = fetch_all_pages(request, CoalescePagination)
110
+ return [
111
+ {
112
+ **node,
113
+ "environment_id": environment_id,
114
+ }
115
+ for node in result
116
+ ]
97
117
 
98
118
  def _fetch_all_nodes(self) -> SerializedAsset:
119
+ environments = self._fetch_environments()
120
+ total = len(environments)
99
121
  nodes: list[dict] = []
100
- for environment in self._fetch_environments():
101
- environment_id = environment["id"]
102
- nodes.extend(self._fetch_env_nodes(environment_id))
122
+
123
+ for index, env in enumerate(environments):
124
+ env_id = env["id"]
125
+ logger.info(f"Fetching nodes for env #{env_id} - {index}/{total}")
126
+ try:
127
+ nodes.extend(self._fetch_env_nodes(env_id))
128
+ except ValidationError as e:
129
+ # 500 Server Error: Internal Server Error on Coalesce API
130
+ logger.warning(
131
+ f"Skipping nodes for {env_id} due to the following Error: {e}"
132
+ )
133
+ logger.info(f"{len(nodes)} nodes extracted so far")
103
134
  return nodes
104
135
 
105
136
  def _fetch_runs(self, starting_from: str) -> SerializedAsset:
106
- """
107
- fetch runs, per environment;
108
- we break per environment to lower the chance of exceeding the 1k limit
109
- """
110
- runs: list[dict] = []
111
- for environment in self._fetch_environments():
112
- environment_id = environment["id"]
113
- runs.extend(
114
- self._fetch_recent_runs_per_env(environment_id, starting_from)
115
- )
116
- return runs
117
-
118
- def _fetch_recent_runs_per_env(
119
- self, environment_id: int, starting_from: str
120
- ) -> SerializedAsset:
121
137
  endpoint = CoalesceEndpointFactory.runs()
122
138
  params = {
123
- "environmentID": environment_id,
124
- "limit": _LIMIT_MAX,
125
139
  "orderBy": "runEndTime",
126
140
  "orderByDirection": "asc",
127
141
  "startingFrom": starting_from,
128
142
  }
129
- result = self._get(endpoint=endpoint, params=params)
130
- return result["data"]
143
+ request = self._get_paginated(
144
+ endpoint=endpoint,
145
+ params=params,
146
+ limit=COALESCE_PAGE_SIZE_RUN_RESULTS,
147
+ )
148
+ return list(fetch_all_pages(request, CoalescePagination))
131
149
 
132
150
  def _fetch_run_results(self, run_id: str) -> SerializedAsset:
133
151
  endpoint = CoalesceEndpointFactory.run_results(run_id)
@@ -136,51 +154,37 @@ class CoalesceClient(APIClient):
136
154
 
137
155
  def _run_results_by_run(
138
156
  self,
157
+ environment_id: str,
139
158
  run_id: str,
140
- test_names: NodeIDToNamesMapping,
141
- column_names: NodeIDToNamesMapping,
142
159
  ) -> SerializedAsset:
143
160
  run_results: list[dict] = []
144
161
  for result in self._fetch_run_results(run_id):
145
- node_id = result["nodeID"]
146
162
  for query_result in result["queryResults"]:
147
- _is_test = is_test(
163
+ if query_result["type"] != "sqlTest":
164
+ continue
165
+ run_result = _run_result_payload(
166
+ environment_id,
167
+ result,
148
168
  query_result,
149
- node_id,
150
- test_names,
151
- column_names,
152
169
  )
153
- if not _is_test:
154
- continue
155
- run_result = _run_result_payload(result, query_result)
156
170
  run_results.append(run_result)
157
171
  return run_results
158
172
 
159
- def _run_results_by_env(
160
- self, environment_id: int, starting_from: str
173
+ def _fetch_all_run_results(
174
+ self,
175
+ starting_from: str,
161
176
  ) -> SerializedAsset:
162
177
  run_results: list[dict] = []
163
- nodes = self._fetch_env_nodes(environment_id)
164
- test_names = test_names_per_node(nodes)
165
- column_names = column_names_per_node(nodes)
166
- runs = self._fetch_recent_runs_per_env(environment_id, starting_from)
167
178
 
168
- for run in runs:
169
- run_id = run["id"]
170
- _results = self._run_results_by_run(
171
- run_id, test_names, column_names
172
- )
173
- run_results.extend(_results)
174
- return run_results
175
-
176
- def _fetch_all_run_results(self, starting_from: str) -> SerializedAsset:
177
- run_results: list[dict] = []
178
-
179
- for environment in self._fetch_environments():
180
- environment_id = environment["id"]
181
- _results = self._run_results_by_env(environment_id, starting_from)
182
- run_results.extend(_results)
179
+ runs = self._fetch_runs(starting_from)
180
+ total = len(runs)
183
181
 
182
+ for index, run in enumerate(runs):
183
+ logger.info(f"Extracting run results ({index}/{total})")
184
+ run_id = run["id"]
185
+ environment_id = run["environmentID"]
186
+ current_results = self._run_results_by_run(environment_id, run_id)
187
+ run_results.extend(current_results)
184
188
  return run_results
185
189
 
186
190
  def fetch(
@@ -0,0 +1,26 @@
1
+ from typing import Optional, Union
2
+
3
+ from ....utils import PaginationModel
4
+
5
+
6
+ class CoalescePagination(PaginationModel):
7
+ """
8
+ Class to handle paginated results for Coalesce
9
+ See their documentation here
10
+ https://docs.coalesce.io/docs/api
11
+ """
12
+
13
+ data: list
14
+ next: Union[Optional[str], Optional[int]] = None
15
+
16
+ def is_last(self) -> bool:
17
+ """Stopping condition for the pagination"""
18
+ return self.next is None
19
+
20
+ def next_page_payload(self):
21
+ """Payload enabling to generate the request for the next page"""
22
+ return {"startingFrom": self.next}
23
+
24
+ def page_results(self) -> list:
25
+ """List of results of the current page"""
26
+ return self.data
@@ -19,7 +19,13 @@ from .client import (
19
19
  handle_response,
20
20
  uri_encode,
21
21
  )
22
- from .collection import deduplicate, empty_iterator, group_by, mapping_from_rows
22
+ from .collection import (
23
+ deduplicate,
24
+ empty_iterator,
25
+ filter_items,
26
+ group_by,
27
+ mapping_from_rows,
28
+ )
23
29
  from .constants import OUTPUT_DIR
24
30
  from .deprecate import deprecate_python
25
31
  from .env import from_env
@@ -66,16 +66,19 @@ def fetch_all_pages(
66
66
  """
67
67
  page_number = 1
68
68
  response_payload = request()
69
+
69
70
  paginated_response = pagination_model(**response_payload)
71
+
70
72
  while not paginated_response.is_last():
71
73
  logger.debug(f"Fetching page number {page_number}")
72
74
  yield from paginated_response.page_results()
73
75
  next_page_parameters = paginated_response.next_page_parameters()
74
- new_request = partial(request, **next_page_parameters)
76
+ request_with_pagination = partial(request, **next_page_parameters)
75
77
  if rate_limit:
76
78
  sleep(rate_limit)
77
79
  paginated_response = pagination_model(
78
- current_page_payload=next_page_parameters, **new_request()
80
+ current_page_payload=next_page_parameters,
81
+ **request_with_pagination(),
79
82
  )
80
83
  page_number += 1
81
84
 
@@ -2,6 +2,8 @@ from collections import defaultdict
2
2
  from collections.abc import Iterable, Sequence
3
3
  from typing import (
4
4
  Any,
5
+ List,
6
+ Optional,
5
7
  TypeVar,
6
8
  )
7
9
 
@@ -80,3 +82,27 @@ def deduplicate(
80
82
  deduplicated.append(element)
81
83
 
82
84
  return deduplicated
85
+
86
+
87
+ def filter_items(
88
+ items: Iterable[T],
89
+ allowed: Optional[Iterable[T]] = None,
90
+ blocked: Optional[Iterable[T]] = None,
91
+ ) -> List[T]:
92
+ """
93
+ Filters `items` by excluding any in `blocked` or including only those in `allowed`.
94
+ If both `allowed` and `blocked` are None, returns all items.
95
+ If both are provided, raise an error.
96
+ """
97
+ items = list(items)
98
+
99
+ if allowed and blocked:
100
+ raise AttributeError(
101
+ "Only one of `allowed` and `blocked` can be provided"
102
+ )
103
+ if blocked:
104
+ return [item for item in items if item not in blocked]
105
+ if allowed:
106
+ return [item for item in items if item in allowed]
107
+
108
+ return items
@@ -1,4 +1,6 @@
1
- from .collection import deduplicate, mapping_from_rows
1
+ import pytest
2
+
3
+ from .collection import deduplicate, filter_items, mapping_from_rows
2
4
 
3
5
 
4
6
  def test__mapping_from_rows__basic_mapping():
@@ -72,3 +74,31 @@ def test_deduplicate():
72
74
  {"id": "2", "name": "duplicate"},
73
75
  ]
74
76
  assert deduplicate("id", elements) == [e1, e2, e3]
77
+
78
+
79
+ def test_sqlserver_databases():
80
+ databases = [
81
+ "prod",
82
+ "staging",
83
+ "test",
84
+ ]
85
+
86
+ # 1. No allowed or blocked: should return all
87
+ result1 = filter_items(databases)
88
+ assert result1 == [
89
+ "prod",
90
+ "staging",
91
+ "test",
92
+ ]
93
+
94
+ # 2. Block "prod": only staging and test should remain
95
+ result2 = filter_items(databases, blocked=["prod"])
96
+ assert result2 == ["staging", "test"]
97
+
98
+ # 3. Only allow "staging" and "test"
99
+ result3 = filter_items(databases, allowed=["staging", "test"])
100
+ assert result3 == ["staging", "test"]
101
+
102
+ # 4. allowed and blocked, should raise
103
+ with pytest.raises(AttributeError):
104
+ filter_items(databases, blocked=["prod"], allowed=["staging", "test"])
@@ -29,6 +29,7 @@ WITH ranked_by_datasource AS (
29
29
  `{project}.region-{region}.INFORMATION_SCHEMA.JOBS_BY_PROJECT`
30
30
  WHERE
31
31
  job_type = 'QUERY'
32
+ AND ARRAY_LENGTH(referenced_tables) > 0
32
33
  AND EXISTS (
33
34
  SELECT
34
35
  1
@@ -1,15 +1,20 @@
1
+ import logging
1
2
  from collections.abc import Iterator
2
3
 
3
4
  from sqlalchemy import text
4
5
 
5
6
  from ...utils import ExtractionQuery, SqlalchemyClient, uri_encode
6
7
 
8
+ logger = logging.getLogger(__name__)
9
+
7
10
  SERVER_URI = "{user}:{password}@{host}:{port}/{database}"
8
11
  MSSQL_URI = f"mssql+pymssql://{SERVER_URI}"
9
12
  DEFAULT_PORT = 1433
10
13
 
11
14
  _KEYS = ("user", "password", "host", "port", "database")
12
15
 
16
+ _SYSTEM_DATABASES = ("master", "model", "msdb", "tempdb", "DBAdmin")
17
+
13
18
 
14
19
  def _check_key(credentials: dict) -> None:
15
20
  for key in _KEYS:
@@ -51,3 +56,13 @@ class MSSQLClient(SqlalchemyClient):
51
56
  yield from results
52
57
  finally:
53
58
  self.close()
59
+
60
+ def get_databases(self) -> list[str]:
61
+ result = self.execute(
62
+ ExtractionQuery("SELECT name FROM sys.databases", {})
63
+ )
64
+ return [
65
+ row["name"]
66
+ for row in result
67
+ if row["name"] not in _SYSTEM_DATABASES
68
+ ]
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
 
3
- from ...utils import LocalStorage, from_env, write_summary
3
+ from ...utils import LocalStorage, filter_items, from_env, write_summary
4
4
  from ..abstract import (
5
5
  CATALOG_ASSETS,
6
6
  EXTERNAL_LINEAGE_ASSETS,
@@ -51,7 +51,13 @@ def extract_all(**kwargs) -> None:
51
51
 
52
52
  client = MSSQLClient(credentials=_credentials(kwargs))
53
53
 
54
- query_builder = MSSQLQueryBuilder()
54
+ databases = filter_items(
55
+ client.get_databases(), kwargs.get("allowed"), kwargs.get("blocked")
56
+ )
57
+
58
+ query_builder = MSSQLQueryBuilder(
59
+ databases=databases,
60
+ )
55
61
 
56
62
  storage = LocalStorage(directory=output_directory)
57
63
 
@@ -11,7 +11,7 @@ WITH extended_tables AS (
11
11
  table_owner_id = principal_id,
12
12
  schema_id
13
13
  FROM
14
- sys.tables
14
+ {database}.sys.tables
15
15
 
16
16
  UNION
17
17
 
@@ -21,7 +21,7 @@ WITH extended_tables AS (
21
21
  table_owner_id = principal_id,
22
22
  schema_id
23
23
  FROM
24
- sys.views
24
+ {database}.sys.views
25
25
 
26
26
  UNION
27
27
 
@@ -31,7 +31,7 @@ WITH extended_tables AS (
31
31
  table_owner_id = principal_id,
32
32
  schema_id
33
33
  FROM
34
- sys.external_tables
34
+ {database}.sys.external_tables
35
35
  ),
36
36
  /*
37
37
  `sys.columns` contains, among others:
@@ -54,11 +54,11 @@ column_ids AS (
54
54
  schema_name = ss.name,
55
55
  schema_id = ss.schema_id,
56
56
  comment = CONVERT(varchar(1024), ep.value)
57
- FROM sys.columns AS sc
57
+ FROM {database}.sys.columns AS sc
58
58
  LEFT JOIN extended_tables AS et ON sc.object_id = et.table_id
59
- LEFT JOIN sys.schemas AS ss ON et.schema_id = ss.schema_id
60
- LEFT JOIN sys.databases AS sd ON sd.name = DB_NAME()
61
- LEFT JOIN sys.extended_properties AS ep
59
+ LEFT JOIN {database}.sys.schemas AS ss ON et.schema_id = ss.schema_id
60
+ LEFT JOIN {database}.sys.databases AS sd ON sd.name = '{database}'
61
+ LEFT JOIN {database}.sys.extended_properties AS ep
62
62
  ON
63
63
  sc.object_id = ep.major_id
64
64
  AND sc.column_id = ep.minor_id
@@ -70,9 +70,9 @@ columns AS (
70
70
  i.database_name,
71
71
  i.database_id,
72
72
  schema_name = c.table_schema,
73
- i.schema_id,
73
+ schema_id = CAST(i.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)),
74
74
  table_name = c.table_name,
75
- i.table_id,
75
+ table_id = CAST(i.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)) + '_' + CAST(i.table_id AS VARCHAR(10)),
76
76
  c.column_name,
77
77
  c.data_type,
78
78
  c.ordinal_position,
@@ -87,7 +87,7 @@ columns AS (
87
87
  i.comment,
88
88
  column_id = CONCAT(i.table_id, '.', c.column_name)
89
89
  FROM
90
- information_schema.columns AS c
90
+ {database}.information_schema.columns AS c
91
91
  LEFT JOIN column_ids AS i
92
92
  ON
93
93
  (
@@ -2,4 +2,4 @@ SELECT
2
2
  db.database_id,
3
3
  database_name = db.name
4
4
  FROM sys.databases AS db
5
- WHERE db.name = DB_NAME()
5
+ WHERE db.name NOT IN ('master', 'model', 'msdb', 'tempdb', 'DBAdmin');
@@ -3,21 +3,20 @@ WITH ids AS (
3
3
  SELECT DISTINCT
4
4
  table_catalog,
5
5
  table_schema
6
- FROM information_schema.tables
7
- WHERE table_catalog = DB_NAME()
6
+ FROM {database}.information_schema.tables
8
7
  )
9
8
 
10
9
  SELECT
11
10
  d.database_id,
12
11
  database_name = i.table_catalog,
13
12
  schema_name = s.name,
14
- s.schema_id,
13
+ schema_id = CAST(d.database_id AS VARCHAR(10)) + '_' + CAST(s.schema_id AS VARCHAR(10)),
15
14
  schema_owner = u.name,
16
15
  schema_owner_id = u.uid
17
- FROM sys.schemas AS s
16
+ FROM {database}.sys.schemas AS s
18
17
  INNER JOIN ids AS i
19
18
  ON s.name = i.table_schema
20
- LEFT JOIN sys.sysusers AS u
19
+ LEFT JOIN {database}.sys.sysusers AS u
21
20
  ON s.principal_id = u.uid
22
- LEFT JOIN sys.databases AS d
21
+ LEFT JOIN {database}.sys.databases AS d
23
22
  ON i.table_catalog = d.name
@@ -11,7 +11,7 @@ WITH extended_tables AS (
11
11
  table_owner_id = principal_id,
12
12
  schema_id
13
13
  FROM
14
- sys.tables
14
+ {database}.sys.tables
15
15
 
16
16
  UNION
17
17
 
@@ -21,7 +21,7 @@ WITH extended_tables AS (
21
21
  table_owner_id = principal_id,
22
22
  schema_id
23
23
  FROM
24
- sys.views
24
+ {database}.sys.views
25
25
 
26
26
  UNION
27
27
 
@@ -31,14 +31,14 @@ WITH extended_tables AS (
31
31
  table_owner_id = principal_id,
32
32
  schema_id
33
33
  FROM
34
- sys.external_tables
34
+ {database}.sys.external_tables
35
35
  ),
36
36
  -- Get the row count per table
37
37
  partitions AS (
38
38
  SELECT
39
39
  object_id,
40
40
  row_count = SUM(rows)
41
- FROM sys.partitions
41
+ FROM {database}.sys.partitions
42
42
  GROUP BY object_id
43
43
  ),
44
44
  -- Append row count to table properties
@@ -69,13 +69,12 @@ table_ids AS (
69
69
  table_owner = u.name,
70
70
  row_count,
71
71
  comment = CONVERT(varchar(1024), ep.value)
72
- FROM
73
- extended_tables_with_row_count AS et
74
- LEFT JOIN sys.schemas AS ss
72
+ FROM extended_tables_with_row_count AS et
73
+ LEFT JOIN {database}.sys.schemas AS ss
75
74
  ON et.schema_id = ss.schema_id
76
- LEFT JOIN sys.sysusers AS u
75
+ LEFT JOIN {database}.sys.sysusers AS u
77
76
  ON et.table_owner_id = u.uid
78
- LEFT JOIN sys.extended_properties AS ep
77
+ LEFT JOIN {database}.sys.extended_properties AS ep
79
78
  ON (
80
79
  et.table_id = ep.major_id
81
80
  AND ep.minor_id = 0
@@ -91,19 +90,18 @@ meta AS (
91
90
  t.table_name,
92
91
  t.table_type
93
92
  FROM
94
- information_schema.tables AS t
95
- LEFT JOIN sys.databases AS db
93
+ {database}.information_schema.tables AS t
94
+ LEFT JOIN {database}.sys.databases AS db
96
95
  ON t.table_catalog = db.name
97
- WHERE t.table_catalog = db_name()
98
96
  )
99
97
 
100
98
  SELECT
101
99
  m.database_name,
102
100
  m.database_id,
103
101
  m.schema_name,
104
- i.schema_id,
102
+ schema_id = CAST(m.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)),
105
103
  m.table_name,
106
- i.table_id,
104
+ table_id = CAST(m.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)) + '_' + CAST(i.table_id AS VARCHAR(10)),
107
105
  m.table_type,
108
106
  i.table_owner,
109
107
  i.table_owner_id,
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from typing import Optional
2
3
 
3
4
  from ..abstract import (
@@ -7,6 +8,15 @@ from ..abstract import (
7
8
  WarehouseAsset,
8
9
  )
9
10
 
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ _DATABASE_REQUIRED = (
15
+ WarehouseAsset.SCHEMA,
16
+ WarehouseAsset.TABLE,
17
+ WarehouseAsset.COLUMN,
18
+ )
19
+
10
20
 
11
21
  class MSSQLQueryBuilder(AbstractQueryBuilder):
12
22
  """
@@ -15,10 +25,29 @@ class MSSQLQueryBuilder(AbstractQueryBuilder):
15
25
 
16
26
  def __init__(
17
27
  self,
28
+ databases: list[str],
18
29
  time_filter: Optional[TimeFilter] = None,
19
30
  ):
20
31
  super().__init__(time_filter=time_filter)
32
+ self._databases = databases
33
+
34
+ @staticmethod
35
+ def _format(query: ExtractionQuery, values: dict) -> ExtractionQuery:
36
+ return ExtractionQuery(
37
+ statement=query.statement.format(**values),
38
+ params=query.params,
39
+ )
21
40
 
22
41
  def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
23
42
  query = self.build_default(asset)
24
- return [query]
43
+
44
+ if asset not in _DATABASE_REQUIRED:
45
+ return [query]
46
+
47
+ logger.info(
48
+ f"\tWill run queries with following database params: {self._databases}",
49
+ )
50
+ return [
51
+ self._format(query, {"database": database})
52
+ for database in self._databases
53
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.33
3
+ Version: 0.24.35
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -215,6 +215,14 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
215
215
 
216
216
  # Changelog
217
217
 
218
+ ## 0.24.35 - 2025-07-29
219
+
220
+ * Coalesce - Fix pagination issue
221
+
222
+ ## 0.24.34 - 2025-07-02
223
+
224
+ * SQLServer: multiple databases
225
+
218
226
  ## 0.24.33 - 2025-07-10
219
227
 
220
228
  * Tableau - Add an option to skip fields ingestion
@@ -225,7 +233,7 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
225
233
 
226
234
  ## 0.24.31 - 2025-07-02
227
235
 
228
- * Looker Studio: add option to list users via a provided JSON file
236
+ * Looker Studio: add an option to list users via a provided JSON file
229
237
 
230
238
  ## 0.24.30 - 2025-06-26
231
239
 
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=jKQMJGiDeDEZG-753wDrtfOoOYa5Db5Liy0AsATdsuc,18779
1
+ CHANGELOG.md,sha256=1S9O_c1LH8T4P78akRxlFS8Tv0i9Jgswy7V9zvd_UQw,18900
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -24,7 +24,7 @@ castor_extractor/commands/extract_salesforce.py,sha256=3j3YTmMkPAwocR-B1ozJQai0U
24
24
  castor_extractor/commands/extract_salesforce_reporting.py,sha256=FdANTNiLkIPdm80XMYxWReHjdycLsIa61pyeCD-sUDk,962
25
25
  castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_9bYjUMg7BOW-k,643
26
26
  castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
27
- castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
27
+ castor_extractor/commands/extract_sqlserver.py,sha256=-20AlQbJ4W3oQytHLKdN8GX__UkrrQukOgSzy2l1WZY,1483
28
28
  castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
29
29
  castor_extractor/commands/extract_tableau.py,sha256=LNtI29LbVk1vp4RNrn89GmdW6R_7QBYunRmkowDhbco,1982
30
30
  castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
@@ -76,12 +76,10 @@ castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
76
76
  castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
77
77
  castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
78
78
  castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
79
- castor_extractor/transformation/coalesce/client/client.py,sha256=7EVJDDxnIm5_uMHLFZ2PD6JzfebVglKST9IiURwn4vs,6524
79
+ castor_extractor/transformation/coalesce/client/client.py,sha256=3YB82ibaumeSRd510mlrPXKsWefV3lHQQVis9oEK-LQ,6133
80
80
  castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
81
81
  castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
82
- castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
83
- castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
84
- castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
82
+ castor_extractor/transformation/coalesce/client/pagination.py,sha256=zynyWCMEzUQ7HA1Q5AP4BAOmxRQI6NA5jCPEo0lHn44,705
85
83
  castor_extractor/transformation/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
86
84
  castor_extractor/transformation/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
87
85
  castor_extractor/transformation/dbt/client.py,sha256=BIue1DNAn2b7kHeiXBkGNosq8jZA2DrgjP7Gi5epAPE,5684
@@ -96,7 +94,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
96
94
  castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
97
95
  castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
98
96
  castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
99
- castor_extractor/utils/__init__.py,sha256=_hC54hBfPH41TTuWMsqQcyYVF7SojrOevW3OAv8M05E,1652
97
+ castor_extractor/utils/__init__.py,sha256=z_BdKTUyuug3I5AzCuSGrAVskfLax4_olfORIjhZw_M,1691
100
98
  castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
101
99
  castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
102
100
  castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
@@ -108,7 +106,7 @@ castor_extractor/utils/client/api/auth.py,sha256=lq0K3UEl1vwIIa_vKTdlpIQPdE5K1-5
108
106
  castor_extractor/utils/client/api/auth_test.py,sha256=LlyXytnatg6ZzR4Zkvzk0BH99FYhHX7qn_nyr2MSnDI,1305
109
107
  castor_extractor/utils/client/api/client.py,sha256=qmj7KoNqt6F-cmpdaMiz_aVxzwMCgbDNcgzXSbCdu1Y,5183
110
108
  castor_extractor/utils/client/api/client_test.py,sha256=FM3ZxsLLfMOBn44cXX6FIgnA31-5TTNIyp9D4LBwtXE,1222
111
- castor_extractor/utils/client/api/pagination.py,sha256=ph5TYqPiyFGgygsIhCATAHPIQ9UJNZyiTcqlyRdGEno,2460
109
+ castor_extractor/utils/client/api/pagination.py,sha256=tNL89bvgnMJd0ajJA07wTTReH3PJOQm3xsa93SKHFss,2499
112
110
  castor_extractor/utils/client/api/pagination_test.py,sha256=jCOgXFXrH-jrCxe2dfk80ZksJF-EtmpJPU11BGabsqk,1385
113
111
  castor_extractor/utils/client/api/safe_request.py,sha256=5pvI2WPRDtitX9F1aYcXTIMPNmDikRK9dKTD3ctoeoQ,1774
114
112
  castor_extractor/utils/client/api/safe_request_test.py,sha256=LqS5FBxs6lLLcTkcgxIoLb6OinxShHXR5y4CWZpwmwg,2005
@@ -118,8 +116,8 @@ castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9
118
116
  castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
119
117
  castor_extractor/utils/client/uri.py,sha256=jmP9hY-6PRqdc3-vAOdtll_U6q9VCqSqmBAN6QRs3ZI,150
120
118
  castor_extractor/utils/client/uri_test.py,sha256=1XKF6qSseCeD4G4ckaNO07JXfGbt7XUVinOZdpEYrDQ,259
121
- castor_extractor/utils/collection.py,sha256=FiIJWZZ865oqNjtTm40gQ13R9zh--W2W5YsMBZJf2bk,2334
122
- castor_extractor/utils/collection_test.py,sha256=XJAGo0Veg0H8wZRCESIkU2t8bXxTNET0BdosomO3-Ls,2104
119
+ castor_extractor/utils/collection.py,sha256=g2HmB0ievvYHWaZ8iEzkcPPkrBFsh6R6b_liBqcsMjc,3044
120
+ castor_extractor/utils/collection_test.py,sha256=mlw33u4VidazQwWxJMvaFeYX3VB5CAj6rqRG-cRsLrw,2884
123
121
  castor_extractor/utils/constants.py,sha256=qBQprS9U66mS-RIBXiLujdTSV3WvGv40Bc0khP4Abdk,39
124
122
  castor_extractor/utils/deprecate.py,sha256=aBIN2QqZUx5CBNZMFfOUhi8QqtPqRcJtmrN6xqfm-y8,805
125
123
  castor_extractor/utils/env.py,sha256=TqdtB50U8LE0993WhhEhpy89TJrHbjtIKjvg6KQ-5q0,596
@@ -197,7 +195,7 @@ castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqN
197
195
  castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
198
196
  castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Phq378VEaFLD-nyP2_A1wge6HUP45jSthhlNjD7aqSg,4085
199
197
  castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
200
- castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=Ub4rdrJ5WTPWKI-eVmXrNMv0Ktmti4b-93zZBr0xEB0,1426
198
+ castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=p7fiXu5--BlY1FKnoW2CAQF7kPKjcN1tYf_SwvCZus4,1474
201
199
  castor_extractor/visualization/looker_studio/extract.py,sha256=NU48xQ83UtRW3jXKJcvofzqgEM2lHGjtTzjbKOSB50A,4059
202
200
  castor_extractor/visualization/looker_studio/extract_test.py,sha256=ZckAxUMuoEjJ9RWkfRvt9M8SxblkQvsq-Grb8GSs-y0,492
203
201
  castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
@@ -420,18 +418,18 @@ castor_extractor/warehouse/snowflake/queries/user.sql,sha256=88V8eRj1NDaD_ufclsK
420
418
  castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3N7BKkbXVM3RoIzqSDtohqRnE5kg4,673
421
419
  castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
422
420
  castor_extractor/warehouse/sqlserver/__init__.py,sha256=PdOuYznmvKAbfWAm8UdN47MfEsd9jqPi_dDi3WEo1KY,116
423
- castor_extractor/warehouse/sqlserver/client.py,sha256=pO5JNykET9luT2h9iAeI2jX-WBkWklXaYmmYIxQMm1o,1601
424
- castor_extractor/warehouse/sqlserver/extract.py,sha256=2mBNx9clyrhoiirD635BW-5u6pPoxHyIsB071XoZjho,2087
421
+ castor_extractor/warehouse/sqlserver/client.py,sha256=Bjfpw96IKAQfWPiU5SZYEDfetwfkqZrnKbQYoStcnZc,2007
422
+ castor_extractor/warehouse/sqlserver/extract.py,sha256=-LoHY5wAGJk4vutrO3N0_PaRqts7rkEn7pADRHzoxiI,2249
425
423
  castor_extractor/warehouse/sqlserver/queries/.sqlfluff,sha256=yy0KQdz8I_67vnXyX8eeWwOWkxTXvHyVKSVwhURktd8,48
426
- castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=Szdf8hwcDffRTgtD6zf4ZuIyHIVijFgSDk1rZbKI3g8,2480
427
- castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4eO6ck-smsDYValYMHLf1CTZu_zIqYycN77jqJH5H7E,106
428
- castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=elM9s02I9d9F5E4MHfJBfria5QT1hHycZHrn06wn9tg,535
429
- castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx_LGkIf7LX6ojTjI8wgJDxm3f0,2542
424
+ castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=_K5OS63N7fM7kGPudnnjJEnIyaxR1xE2hoZgnJ_A3p8,2763
425
+ castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4dPeBCn85MEOXr1f-DPXxiI3RvvoE_1n8lsbTs26E0I,150
426
+ castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=UR3eTiYw7Iq5-GukelnNg_uq6haZ_dwg_SedZfOWUoA,619
427
+ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=4RgeSkHDWTWRyU2iLxaBR0KuSwIBvb3GbQGdkJYXbn0,2787
430
428
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
431
- castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
429
+ castor_extractor/warehouse/sqlserver/query.py,sha256=7sW8cK3JzxPt6faTJ7e4lk9tE4fo_AeCymI-LqsSols,1276
432
430
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
433
- castor_extractor-0.24.33.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
434
- castor_extractor-0.24.33.dist-info/METADATA,sha256=vCEpwDM8sngoUEfrGtRPSjtCjTw6zxJGiJrnmj4eq_Y,26232
435
- castor_extractor-0.24.33.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
436
- castor_extractor-0.24.33.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
437
- castor_extractor-0.24.33.dist-info/RECORD,,
431
+ castor_extractor-0.24.35.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
432
+ castor_extractor-0.24.35.dist-info/METADATA,sha256=-vrfKzS5B3r2qL7tjFjFBR-AizzuVIexEVJHCci7Z5s,26353
433
+ castor_extractor-0.24.35.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
434
+ castor_extractor-0.24.35.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
435
+ castor_extractor-0.24.35.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- NodeIDToNamesMapping = dict[str, set[str]]
@@ -1,52 +0,0 @@
1
- from ....utils import SerializedAsset
2
- from .type import NodeIDToNamesMapping
3
-
4
- _NULL_SUFFIX = ": Null"
5
- _UNIQUE_SUFFIX = ": Unique"
6
-
7
-
8
- def is_test(
9
- query_result: dict,
10
- node_id: str,
11
- test_names: NodeIDToNamesMapping,
12
- column_names: NodeIDToNamesMapping,
13
- ) -> bool:
14
- """
15
- checks whether a query result is a test result or not.
16
-
17
- all this implementation can soon be replaced by checking whether
18
- query_result['type'] == 'sqlTest', which should be GA Apr 28th 2025
19
- """
20
- # test scoped on the node (table)
21
- result_name = query_result["name"]
22
- if result_name in test_names.get(node_id, {}):
23
- return True
24
-
25
- # test scoped on the column
26
- if result_name.endswith(_NULL_SUFFIX) or result_name.endswith(
27
- _UNIQUE_SUFFIX
28
- ):
29
- column_name = result_name.split(":")[0]
30
- if column_name in column_names.get(node_id, {}):
31
- return True
32
- return False
33
-
34
-
35
- def test_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
36
- """mapping nodeID: set(testName)"""
37
- mapping: dict[str, set[str]] = {}
38
- for node in nodes:
39
- node_id = node["id"]
40
- tests = node.get("metadata", {}).get("appliedNodeTests", [])
41
- mapping[node_id] = {test["name"] for test in tests}
42
- return mapping
43
-
44
-
45
- def column_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
46
- """mapping nodeID: set(columnNames)"""
47
- mapping: dict[str, set[str]] = {}
48
- for node in nodes:
49
- node_id = node["id"]
50
- columns = node.get("metadata", {}).get("columns", [])
51
- mapping[node_id] = {column["name"] for column in columns}
52
- return mapping
@@ -1,54 +0,0 @@
1
- from .utils import is_test
2
-
3
-
4
- def test_is_test():
5
- test_names = {"some-uuid": {"check-mirrors", "check-seatbelt"}}
6
- column_names = {"some-uuid": {"carthago", "delenda", "est"}}
7
-
8
- happy_node_test = is_test(
9
- query_result={"name": "check-mirrors"},
10
- node_id="some-uuid",
11
- test_names=test_names,
12
- column_names=column_names,
13
- )
14
- assert happy_node_test is True
15
-
16
- unknown_node_test = is_test(
17
- query_result={"name": "check-engine"},
18
- node_id="some-uuid",
19
- test_names=test_names,
20
- column_names=column_names,
21
- )
22
- assert unknown_node_test is False
23
-
24
- happy_column_test_unique = is_test(
25
- query_result={"name": "carthago: Unique"},
26
- node_id="some-uuid",
27
- test_names=test_names,
28
- column_names=column_names,
29
- )
30
- assert happy_column_test_unique is True
31
-
32
- happy_column_test_null = is_test(
33
- query_result={"name": "carthago: Null"},
34
- node_id="some-uuid",
35
- test_names=test_names,
36
- column_names=column_names,
37
- )
38
- assert happy_column_test_null is True
39
-
40
- unknown_column_test = is_test(
41
- query_result={"name": "rome: Unique"},
42
- node_id="some-uuid",
43
- test_names=test_names,
44
- column_names=column_names,
45
- )
46
- assert unknown_column_test is False
47
-
48
- unknown_node_id_test = is_test(
49
- query_result={"name": "whatever: Unique"},
50
- node_id="unknown-uuid",
51
- test_names=test_names,
52
- column_names=column_names,
53
- )
54
- assert unknown_node_id_test is False