castor-extractor 0.16.6__py3-none-any.whl → 0.16.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (30) hide show
  1. CHANGELOG.md +21 -0
  2. castor_extractor/utils/__init__.py +2 -1
  3. castor_extractor/utils/collection.py +32 -0
  4. castor_extractor/utils/collection_test.py +60 -0
  5. castor_extractor/utils/time.py +9 -1
  6. castor_extractor/utils/time_test.py +8 -1
  7. castor_extractor/visualization/domo/client/client.py +28 -43
  8. castor_extractor/visualization/domo/client/client_test.py +1 -23
  9. castor_extractor/visualization/domo/client/endpoints.py +13 -6
  10. castor_extractor/visualization/domo/client/pagination.py +4 -0
  11. castor_extractor/visualization/looker/api/client.py +21 -17
  12. castor_extractor/visualization/looker/api/sdk.py +10 -58
  13. castor_extractor/visualization/looker/api/utils.py +1 -1
  14. castor_extractor/visualization/looker/extract.py +2 -1
  15. castor_extractor/visualization/looker/multithreading.py +1 -1
  16. castor_extractor/visualization/tableau_revamp/client/client.py +79 -13
  17. castor_extractor/visualization/tableau_revamp/client/gql_queries.py +23 -16
  18. castor_extractor/visualization/tableau_revamp/client/tsc_fields.py +4 -0
  19. castor_extractor/warehouse/databricks/client.py +12 -5
  20. castor_extractor/warehouse/databricks/client_test.py +22 -3
  21. castor_extractor/warehouse/databricks/format.py +5 -1
  22. castor_extractor/warehouse/salesforce/client.py +8 -6
  23. castor_extractor/warehouse/salesforce/extract.py +2 -2
  24. castor_extractor/warehouse/salesforce/format.py +34 -7
  25. castor_extractor/warehouse/salesforce/format_test.py +49 -1
  26. {castor_extractor-0.16.6.dist-info → castor_extractor-0.16.11.dist-info}/METADATA +6 -3
  27. {castor_extractor-0.16.6.dist-info → castor_extractor-0.16.11.dist-info}/RECORD +30 -29
  28. {castor_extractor-0.16.6.dist-info → castor_extractor-0.16.11.dist-info}/LICENCE +0 -0
  29. {castor_extractor-0.16.6.dist-info → castor_extractor-0.16.11.dist-info}/WHEEL +0 -0
  30. {castor_extractor-0.16.6.dist-info → castor_extractor-0.16.11.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,7 @@ import logging
2
2
  from typing import Dict, Iterator, List, Optional
3
3
 
4
4
  import tableauserverclient as TSC # type: ignore
5
+ from tableauserverclient import Pager
5
6
 
6
7
  from ....utils import SerializedAsset
7
8
  from ..assets import TableauRevampAsset
@@ -12,7 +13,7 @@ from ..constants import (
12
13
  )
13
14
  from .credentials import TableauRevampCredentials
14
15
  from .errors import TableauApiError
15
- from .gql_queries import GQL_QUERIES, QUERY_TEMPLATE
16
+ from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
16
17
  from .tsc_fields import TSC_FIELDS
17
18
 
18
19
  logger = logging.getLogger(__name__)
@@ -27,13 +28,18 @@ _TSC_ASSETS = (
27
28
  TableauRevampAsset.USAGE,
28
29
  )
29
30
 
31
+ # increase the value when extraction is too slow
32
+ # decrease the value when timeouts arise
30
33
  _CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
34
+ # fields and columns are light but volumes are bigger
35
+ TableauRevampAsset.COLUMN: 200,
31
36
  TableauRevampAsset.FIELD: 1000,
37
+ TableauRevampAsset.TABLE: 50,
32
38
  }
33
39
 
34
40
 
35
41
  def _pick_fields(
36
- data: SerializedAsset,
42
+ data: Pager,
37
43
  asset: TableauRevampAsset,
38
44
  ) -> SerializedAsset:
39
45
  fields = TSC_FIELDS[asset]
@@ -44,7 +50,7 @@ def _pick_fields(
44
50
  return [_pick(row) for row in data]
45
51
 
46
52
 
47
- def _enrich_with_tsc(
53
+ def _enrich_datasources_with_tsc(
48
54
  datasources: SerializedAsset,
49
55
  tsc_datasources: SerializedAsset,
50
56
  ) -> SerializedAsset:
@@ -69,6 +75,32 @@ def _enrich_with_tsc(
69
75
  return datasources
70
76
 
71
77
 
78
+ def _enrich_workbooks_with_tsc(
79
+ workbooks: SerializedAsset,
80
+ tsc_workbooks: SerializedAsset,
81
+ ) -> SerializedAsset:
82
+ """
83
+ Enrich workbooks with fields coming from TableauServerClient:
84
+ - project_luid
85
+ """
86
+
87
+ mapping = {row["id"]: row for row in tsc_workbooks}
88
+
89
+ for workbook in workbooks:
90
+ luid = workbook["luid"]
91
+ tsc_workbook = mapping.get(luid)
92
+ if not tsc_workbook:
93
+ # it happens that a workbook is in Metadata API but not in TSC
94
+ # in this case, we push the workbook with default project
95
+ logger.warning(f"Workbook {luid} was not found in TSC")
96
+ workbook["projectLuid"] = None
97
+ continue
98
+
99
+ workbook["projectLuid"] = tsc_workbook["project_id"]
100
+
101
+ return workbooks
102
+
103
+
72
104
  def gql_query_scroll(
73
105
  server,
74
106
  query: str,
@@ -176,29 +208,32 @@ class TableauRevampClient:
176
208
  asset: TableauRevampAsset,
177
209
  ) -> SerializedAsset:
178
210
 
179
- if asset == TableauRevampAsset.USER:
180
- data = TSC.Pager(self._server.users)
211
+ if asset == TableauRevampAsset.DATASOURCE:
212
+ data = TSC.Pager(self._server.datasources)
181
213
 
182
214
  elif asset == TableauRevampAsset.PROJECT:
183
215
  data = TSC.Pager(self._server.projects)
184
216
 
185
- elif asset == TableauRevampAsset.DATASOURCE:
186
- data = TSC.Pager(self._server.datasources)
187
-
188
217
  elif asset == TableauRevampAsset.USAGE:
189
218
  data = TSC.Pager(self._server.views, usage=True)
190
219
 
220
+ elif asset == TableauRevampAsset.USER:
221
+ data = TSC.Pager(self._server.users)
222
+
223
+ elif asset == TableauRevampAsset.WORKBOOK:
224
+ data = TSC.Pager(self._server.workbooks)
225
+
191
226
  else:
192
227
  raise AssertionError(f"Fetching from TSC not supported for {asset}")
193
228
 
194
229
  return _pick_fields(data, asset)
195
230
 
196
- def _fetch_from_metadata_api(
231
+ def _run_graphql_query(
197
232
  self,
198
- asset: TableauRevampAsset,
233
+ resource: str,
234
+ fields: str,
235
+ page_size: int = DEFAULT_PAGE_SIZE,
199
236
  ) -> SerializedAsset:
200
- resource, fields = GQL_QUERIES[asset]
201
- page_size = _CUSTOM_PAGE_SIZE.get(asset) or DEFAULT_PAGE_SIZE
202
237
  query = QUERY_TEMPLATE.format(
203
238
  resource=resource,
204
239
  fields=fields,
@@ -207,13 +242,40 @@ class TableauRevampClient:
207
242
  result_pages = gql_query_scroll(self._server, query, resource)
208
243
  return [asset for page in result_pages for asset in page]
209
244
 
245
+ def _fetch_fields(self) -> SerializedAsset:
246
+ result: SerializedAsset = []
247
+ page_size = _CUSTOM_PAGE_SIZE[TableauRevampAsset.FIELD]
248
+ for resource, fields in FIELDS_QUERIES:
249
+ current = self._run_graphql_query(resource, fields, page_size)
250
+ result.extend(current)
251
+ return result
252
+
253
+ def _fetch_from_metadata_api(
254
+ self,
255
+ asset: TableauRevampAsset,
256
+ ) -> SerializedAsset:
257
+ if asset == TableauRevampAsset.FIELD:
258
+ return self._fetch_fields()
259
+
260
+ page_size = _CUSTOM_PAGE_SIZE.get(asset) or DEFAULT_PAGE_SIZE
261
+ resource, fields = GQL_QUERIES[asset]
262
+ return self._run_graphql_query(resource, fields, page_size)
263
+
210
264
  def _fetch_datasources(self) -> SerializedAsset:
211
265
  asset = TableauRevampAsset.DATASOURCE
212
266
 
213
267
  datasources = self._fetch_from_metadata_api(asset)
214
268
  datasource_projects = self._fetch_from_tsc(asset)
215
269
 
216
- return _enrich_with_tsc(datasources, datasource_projects)
270
+ return _enrich_datasources_with_tsc(datasources, datasource_projects)
271
+
272
+ def _fetch_workbooks(self) -> SerializedAsset:
273
+ asset = TableauRevampAsset.WORKBOOK
274
+
275
+ workbooks = self._fetch_from_metadata_api(asset)
276
+ workbook_projects = self._fetch_from_tsc(asset)
277
+
278
+ return _enrich_workbooks_with_tsc(workbooks, workbook_projects)
217
279
 
218
280
  def fetch(
219
281
  self,
@@ -226,6 +288,10 @@ class TableauRevampClient:
226
288
  # both APIs are required to extract datasources
227
289
  return self._fetch_datasources()
228
290
 
291
+ if asset == TableauRevampAsset.WORKBOOK:
292
+ # both APIs are required to extract workbooks
293
+ return self._fetch_workbooks()
294
+
229
295
  if asset in _TSC_ASSETS:
230
296
  # some assets can only be extracted via TSC
231
297
  return self._fetch_from_tsc(asset)
@@ -18,7 +18,11 @@ QUERY_TEMPLATE = """
18
18
 
19
19
  _COLUMNS_QUERY = """
20
20
  downstreamDashboards { id }
21
- downstreamFields { id }
21
+ downstreamFields {
22
+ id
23
+ __typename
24
+ datasource { id }
25
+ }
22
26
  downstreamWorkbooks { id }
23
27
  id
24
28
  name
@@ -37,12 +41,10 @@ workbook { id }
37
41
 
38
42
  _DATASOURCES_QUERY = """
39
43
  __typename
40
- createdAt
41
44
  downstreamDashboards { id }
42
45
  downstreamWorkbooks { id }
43
46
  id
44
47
  name
45
- updatedAt
46
48
  ... on PublishedDatasource {
47
49
  description
48
50
  luid
@@ -64,7 +66,6 @@ name
64
66
  connectionType
65
67
  fullName
66
68
  schema
67
- tableType
68
69
  }
69
70
  ... on CustomSQLTable {
70
71
  query
@@ -80,7 +81,6 @@ id
80
81
  luid
81
82
  name
82
83
  owner { luid }
83
- projectLuid
84
84
  site { name }
85
85
  tags { name }
86
86
  updatedAt
@@ -96,16 +96,17 @@ downstreamWorkbooks { id }
96
96
  folderName
97
97
  id
98
98
  name
99
- ... on DataField {
100
- dataType
101
- role
102
- }
103
- ... on ColumnField {
104
- columns {
105
- name
106
- table { name }
107
- }
108
- }
99
+ dataType
100
+ role
101
+ """
102
+
103
+
104
+ _FIELDS_QUERY_WITH_COLUMNS = f"""
105
+ {_FIELDS_QUERY}
106
+ columns {{
107
+ name
108
+ table {{ name }}
109
+ }}
109
110
  """
110
111
 
111
112
  _SHEETS_QUERY = """
@@ -124,8 +125,14 @@ GQL_QUERIES: Dict[TableauRevampAsset, Tuple[str, str]] = {
124
125
  TableauRevampAsset.COLUMN: ("columns", _COLUMNS_QUERY),
125
126
  TableauRevampAsset.DASHBOARD: ("dashboards", _DASHBOARDS_QUERY),
126
127
  TableauRevampAsset.DATASOURCE: ("datasources", _DATASOURCES_QUERY),
127
- TableauRevampAsset.FIELD: ("fields", _FIELDS_QUERY),
128
128
  TableauRevampAsset.SHEET: ("sheets", _SHEETS_QUERY),
129
129
  TableauRevampAsset.TABLE: ("tables", _TABLES_QUERY),
130
130
  TableauRevampAsset.WORKBOOK: ("workbooks", _WORKBOOKS_QUERY),
131
131
  }
132
+
133
+ FIELDS_QUERIES = (
134
+ ("binFields", _FIELDS_QUERY),
135
+ ("calculatedFields", _FIELDS_QUERY),
136
+ ("columnFields", _FIELDS_QUERY_WITH_COLUMNS),
137
+ ("groupFields", _FIELDS_QUERY),
138
+ )
@@ -27,4 +27,8 @@ TSC_FIELDS: Dict[TableauRevampAsset, Set[str]] = {
27
27
  "name",
28
28
  "site_role",
29
29
  },
30
+ TableauRevampAsset.WORKBOOK: {
31
+ "id",
32
+ "project_id",
33
+ },
30
34
  }
@@ -3,7 +3,7 @@ from datetime import date
3
3
  from functools import partial
4
4
  from typing import Any, Dict, List, Optional, Set
5
5
 
6
- from ...utils import at_midnight, date_after
6
+ from ...utils import at_midnight, date_after, mapping_from_rows
7
7
  from ...utils.client.api import APIClient
8
8
  from ...utils.pager import PagerOnToken
9
9
  from ..abstract.time_filter import TimeFilter
@@ -88,15 +88,22 @@ class DatabricksClient(APIClient):
88
88
  )
89
89
 
90
90
  @staticmethod
91
- def _match_table_with_user(table: dict, user_id_by_email: dict) -> dict:
91
+ def _match_table_with_user(table: dict, user_mapping: dict) -> dict:
92
92
  table_owner_email = table.get("owner_email")
93
93
  if not table_owner_email:
94
94
  return table
95
- owner_external_id = user_id_by_email.get(table_owner_email)
95
+ owner_external_id = user_mapping.get(table_owner_email)
96
96
  if not owner_external_id:
97
97
  return table
98
98
  return {**table, "owner_external_id": owner_external_id}
99
99
 
100
+ @staticmethod
101
+ def _get_user_mapping(users: List[dict]) -> dict:
102
+ return {
103
+ **mapping_from_rows(users, "email", "id"),
104
+ **mapping_from_rows(users, "user_name", "id"),
105
+ }
106
+
100
107
  def tables_and_columns(
101
108
  self, schemas: List[dict], users: List[dict]
102
109
  ) -> TablesColumns:
@@ -105,11 +112,11 @@ class DatabricksClient(APIClient):
105
112
  """
106
113
  tables: List[dict] = []
107
114
  columns: List[dict] = []
108
- user_id_by_email = {user.get("email"): user.get("id") for user in users}
115
+ user_mapping = self._get_user_mapping(users)
109
116
  for schema in schemas:
110
117
  t_to_add, c_to_add = self._tables_columns_of_schema(schema)
111
118
  t_with_owner = [
112
- self._match_table_with_user(table, user_id_by_email)
119
+ self._match_table_with_user(table, user_mapping)
113
120
  for table in t_to_add
114
121
  ]
115
122
  tables.extend(t_with_owner)
@@ -66,15 +66,34 @@ def test_DatabricksClient__keep_catalog():
66
66
  assert not client._keep_catalog("something_unknown")
67
67
 
68
68
 
69
+ def test_DatabricksClient__get_user_mapping():
70
+ client = MockDatabricksClient()
71
+ users = [
72
+ {"id": "both", "email": "hello@world.com", "user_name": "hello world"},
73
+ {"id": "no_email", "email": "", "user_name": "no email"},
74
+ {"id": "no_name", "email": "no@name.fr", "user_name": ""},
75
+ {"id": "no_both", "email": "", "user_name": ""},
76
+ {"id": "", "email": "no@id.com", "user_name": "no id"},
77
+ ]
78
+ expected = {
79
+ "hello@world.com": "both",
80
+ "hello world": "both",
81
+ "no@name.fr": "no_name",
82
+ "no email": "no_email",
83
+ }
84
+ mapping = client._get_user_mapping(users)
85
+ assert mapping == expected
86
+
87
+
69
88
  def test_DatabricksClient__match_table_with_user():
70
89
  client = MockDatabricksClient()
71
- users_by_email = {"bob@castordoc.com": 3}
90
+ user_mapping = {"bob@castordoc.com": 3}
72
91
 
73
92
  table = {"id": 1, "owner_email": "bob@castordoc.com"}
74
- table_with_owner = client._match_table_with_user(table, users_by_email)
93
+ table_with_owner = client._match_table_with_user(table, user_mapping)
75
94
 
76
95
  assert table_with_owner == {**table, "owner_external_id": 3}
77
96
 
78
97
  table_without_owner = {"id": 1, "owner_email": None}
79
- actual = client._match_table_with_user(table_without_owner, users_by_email)
98
+ actual = client._match_table_with_user(table_without_owner, user_mapping)
80
99
  assert actual == table_without_owner
@@ -127,13 +127,17 @@ class DatabricksFormatter:
127
127
  return email["value"]
128
128
  return emails[0]["value"]
129
129
 
130
+ def _email(self, user: dict) -> Optional[str]:
131
+ emails = user.get("emails")
132
+ return self._primary(emails) if emails else None
133
+
130
134
  def format_user(self, raw_users: List[dict]) -> List[dict]:
131
135
  users = []
132
136
  for user in raw_users:
133
137
  users.append(
134
138
  {
135
139
  "id": user["id"],
136
- "email": self._primary(user["emails"]),
140
+ "email": self._email(user),
137
141
  "first_name": None,
138
142
  "last_name": user.get("displayName") or user["userName"],
139
143
  "user_name": user["userName"],
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict, Iterator, List
2
+ from typing import Dict, Iterator, List, Tuple
3
3
 
4
4
  from tqdm import tqdm # type: ignore
5
5
 
@@ -96,17 +96,19 @@ class SalesforceClient(SalesforceBaseClient):
96
96
  """
97
97
  sobjects = self.fetch_sobjects()
98
98
  logger.info(f"Extracted {len(sobjects)} sobjects")
99
- return self.formatter.tables(sobjects)
99
+ return list(self.formatter.tables(sobjects))
100
100
 
101
101
  def columns(
102
- self, sobject_names: List[str], show_progress: bool = True
102
+ self, sobject_names: List[Tuple[str, str]], show_progress: bool = True
103
103
  ) -> List[dict]:
104
104
  """
105
105
  Get salesforce sobject fields as columns
106
106
  show_progress: optionally deactivate the tqdm progress bar
107
107
  """
108
108
  sobject_fields: Dict[str, List[dict]] = dict()
109
- for sobject_name in tqdm(sobject_names, disable=not show_progress):
110
- fields = self.fetch_fields(sobject_name)
111
- sobject_fields[sobject_name] = fields
109
+ for api_name, table_name in tqdm(
110
+ sobject_names, disable=not show_progress
111
+ ):
112
+ fields = self.fetch_fields(api_name)
113
+ sobject_fields[table_name] = fields
112
114
  return self.formatter.columns(sobject_fields)
@@ -72,8 +72,8 @@ class SalesforceExtractionProcessor:
72
72
  catalog_locations[WarehouseAsset.TABLE.value] = location
73
73
  logger.info(f"Extracted {len(tables)} tables to {location}")
74
74
 
75
- table_names = [t["table_name"] for t in tables]
76
- columns = self._client.columns(table_names, show_progress)
75
+ sobject_names = [(t["api_name"], t["table_name"]) for t in tables]
76
+ columns = self._client.columns(sobject_names, show_progress)
77
77
  location = self._storage.put(WarehouseAsset.COLUMN.value, columns)
78
78
  catalog_locations[WarehouseAsset.COLUMN.value] = location
79
79
  logger.info(f"Extracted {len(columns)} columns to {location}")
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List
1
+ from typing import Any, Dict, Iterator, List
2
2
 
3
3
  from .constants import SCHEMA_NAME
4
4
 
@@ -35,17 +35,35 @@ def _to_column_payload(field: dict, position: int, table_name: str) -> dict:
35
35
  }
36
36
 
37
37
 
38
- def _to_table_payload(table: dict) -> dict:
38
+ def _to_table_payload(sobject: dict, table_name: str) -> dict:
39
39
  return {
40
- "id": table["QualifiedApiName"],
40
+ "id": table_name,
41
+ "api_name": sobject["QualifiedApiName"],
42
+ "label": sobject["Label"],
41
43
  "schema_id": SCHEMA_NAME,
42
- "table_name": table["QualifiedApiName"],
44
+ "table_name": table_name,
43
45
  "description": "",
44
46
  "tags": [],
45
47
  "type": "TABLE",
46
48
  }
47
49
 
48
50
 
51
+ def _merge_label_and_api_name(sobject: dict) -> dict:
52
+ label = sobject["Label"]
53
+ api_name = sobject["QualifiedApiName"]
54
+ table_name = f"{label} ({api_name})"
55
+ return _to_table_payload(sobject, table_name)
56
+
57
+
58
+ def _by_label(sobjects: List[dict]) -> Dict[str, List[dict]]:
59
+ by_label: Dict[str, List[dict]] = dict()
60
+ for sobject in sobjects:
61
+ label = sobject["Label"]
62
+ similar_sobjects = by_label.setdefault(label, [])
63
+ similar_sobjects.append(sobject)
64
+ return by_label
65
+
66
+
49
67
  class SalesforceFormatter:
50
68
  """
51
69
  Helper functions that format the response in the format to be exported as
@@ -53,9 +71,18 @@ class SalesforceFormatter:
53
71
  """
54
72
 
55
73
  @staticmethod
56
- def tables(sobjects: List[dict]) -> List[dict]:
57
- """formats the raw list of sobjects to tables"""
58
- return [_to_table_payload(s) for s in sobjects]
74
+ def tables(sobjects: List[dict]) -> Iterator[dict]:
75
+ """
76
+ formats the raw list of sobjects to tables
77
+ if two tables share the same label, then we add the api name as well
78
+ """
79
+ by_label = _by_label(sobjects)
80
+ for label, similars in by_label.items():
81
+ if len(similars) > 1:
82
+ yield from [_merge_label_and_api_name(s) for s in similars]
83
+ else:
84
+ sobject = similars[0] # unique sobject on label
85
+ yield _to_table_payload(sobject, label)
59
86
 
60
87
  @staticmethod
61
88
  def columns(sobject_fields: Dict[str, List[dict]]) -> List[dict]:
@@ -1,4 +1,21 @@
1
- from .format import _field_description
1
+ from typing import Dict, Tuple
2
+
3
+ from .format import (
4
+ SCHEMA_NAME,
5
+ SalesforceFormatter,
6
+ _by_label,
7
+ _field_description,
8
+ _merge_label_and_api_name,
9
+ )
10
+
11
+
12
+ def _example_sobjects() -> Tuple[Dict[str, str], ...]:
13
+ """Returns 4 sobjects with 2 sharing the same label"""
14
+ a = {"Label": "a", "QualifiedApiName": "a_one"}
15
+ b = {"Label": "b", "QualifiedApiName": "b"}
16
+ c = {"Label": "c", "QualifiedApiName": "c"}
17
+ a_prime = {"Label": "a", "QualifiedApiName": "a_two"}
18
+ return a, b, c, a_prime
2
19
 
3
20
 
4
21
  def test__field_description():
@@ -30,3 +47,34 @@ def test__field_description():
30
47
  "- Data Sensitivity Level: bam"
31
48
  )
32
49
  assert description == expected
50
+
51
+
52
+ def test__merge_label_and_api_name():
53
+ sobject = {"Label": "foo", "QualifiedApiName": "bar"}
54
+ payload = _merge_label_and_api_name(sobject)
55
+ expected_name = "foo (bar)"
56
+ assert payload == {
57
+ "id": expected_name,
58
+ "api_name": "bar",
59
+ "label": "foo",
60
+ "schema_id": SCHEMA_NAME,
61
+ "table_name": expected_name,
62
+ "description": "",
63
+ "tags": [],
64
+ "type": "TABLE",
65
+ }
66
+
67
+
68
+ def test__by_label():
69
+ a, b, c, a_prime = _example_sobjects()
70
+ sobjects = [a, b, c, a_prime]
71
+ by_label = _by_label(sobjects)
72
+ assert by_label == {"a": [a, a_prime], "b": [b], "c": [c]}
73
+
74
+
75
+ def test_salesforce_formatter_tables():
76
+ sobjects = [*_example_sobjects()]
77
+ tables = SalesforceFormatter.tables(sobjects)
78
+ expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
79
+ payload_names = {t["table_name"] for t in tables}
80
+ assert payload_names == expected_names
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.16.6
3
+ Version: 0.16.11
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
7
7
  Author: Castor
8
8
  Author-email: support@castordoc.com
9
- Requires-Python: >=3.8,<3.12
9
+ Requires-Python: >=3.8,<3.13
10
10
  Classifier: License :: Other/Proprietary License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Classifier: Programming Language :: Python :: 3
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.8
14
14
  Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
17
18
  Provides-Extra: all
18
19
  Provides-Extra: bigquery
19
20
  Provides-Extra: dbt
@@ -34,8 +35,10 @@ Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
34
35
  Requires-Dist: google-cloud-storage (>=2,<3)
35
36
  Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
36
37
  Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
37
- Requires-Dist: looker-sdk (>=22.4.0,<=23.0.0) ; extra == "looker" or extra == "all"
38
+ Requires-Dist: looker-sdk (>=23.0.0) ; extra == "looker" or extra == "all"
38
39
  Requires-Dist: msal (>=1.20.0,<2.0.0) ; extra == "powerbi" or extra == "all"
40
+ Requires-Dist: numpy (<1.25) ; python_version >= "3.8" and python_version < "3.9"
41
+ Requires-Dist: numpy (>=1.26,<2) ; python_version >= "3.12" and python_version < "3.13"
39
42
  Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0) ; extra == "metabase" or extra == "postgres" or extra == "redshift" or extra == "all"
40
43
  Requires-Dist: pycryptodome (>=3.0.0,<4.0.0) ; extra == "metabase" or extra == "all"
41
44
  Requires-Dist: pydantic (>=2.6,<3.0)