castor-extractor 0.16.9__py3-none-any.whl → 0.16.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.16.11 - 2024-06-03
4
+
5
+ * Tableau: add extra fields to optimise storage
6
+
7
+ ## 0.16.10 - 2024-05-30
8
+
9
+ * Salesforce: extract sobjects Label as table name
10
+
3
11
  ## 0.16.9 - 2024-05-28
4
12
 
5
13
  * Tableau: extract only fields that are necessary
@@ -28,10 +28,13 @@ _TSC_ASSETS = (
28
28
  TableauRevampAsset.USAGE,
29
29
  )
30
30
 
31
- # speed up extraction: fields and columns are smaller but volumes are bigger
31
+ # increase the value when extraction is too slow
32
+ # decrease the value when timeouts arise
32
33
  _CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
34
+ # fields and columns are light but volumes are bigger
35
+ TableauRevampAsset.COLUMN: 200,
33
36
  TableauRevampAsset.FIELD: 1000,
34
- TableauRevampAsset.COLUMN: 1000,
37
+ TableauRevampAsset.TABLE: 50,
35
38
  }
36
39
 
37
40
 
@@ -18,7 +18,11 @@ QUERY_TEMPLATE = """
18
18
 
19
19
  _COLUMNS_QUERY = """
20
20
  downstreamDashboards { id }
21
- downstreamFields { id }
21
+ downstreamFields {
22
+ id
23
+ __typename
24
+ datasource { id }
25
+ }
22
26
  downstreamWorkbooks { id }
23
27
  id
24
28
  name
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict, Iterator, List
2
+ from typing import Dict, Iterator, List, Tuple
3
3
 
4
4
  from tqdm import tqdm # type: ignore
5
5
 
@@ -96,17 +96,19 @@ class SalesforceClient(SalesforceBaseClient):
96
96
  """
97
97
  sobjects = self.fetch_sobjects()
98
98
  logger.info(f"Extracted {len(sobjects)} sobjects")
99
- return self.formatter.tables(sobjects)
99
+ return list(self.formatter.tables(sobjects))
100
100
 
101
101
  def columns(
102
- self, sobject_names: List[str], show_progress: bool = True
102
+ self, sobject_names: List[Tuple[str, str]], show_progress: bool = True
103
103
  ) -> List[dict]:
104
104
  """
105
105
  Get salesforce sobject fields as columns
106
106
  show_progress: optionally deactivate the tqdm progress bar
107
107
  """
108
108
  sobject_fields: Dict[str, List[dict]] = dict()
109
- for sobject_name in tqdm(sobject_names, disable=not show_progress):
110
- fields = self.fetch_fields(sobject_name)
111
- sobject_fields[sobject_name] = fields
109
+ for api_name, table_name in tqdm(
110
+ sobject_names, disable=not show_progress
111
+ ):
112
+ fields = self.fetch_fields(api_name)
113
+ sobject_fields[table_name] = fields
112
114
  return self.formatter.columns(sobject_fields)
@@ -72,8 +72,8 @@ class SalesforceExtractionProcessor:
72
72
  catalog_locations[WarehouseAsset.TABLE.value] = location
73
73
  logger.info(f"Extracted {len(tables)} tables to {location}")
74
74
 
75
- table_names = [t["table_name"] for t in tables]
76
- columns = self._client.columns(table_names, show_progress)
75
+ sobject_names = [(t["api_name"], t["table_name"]) for t in tables]
76
+ columns = self._client.columns(sobject_names, show_progress)
77
77
  location = self._storage.put(WarehouseAsset.COLUMN.value, columns)
78
78
  catalog_locations[WarehouseAsset.COLUMN.value] = location
79
79
  logger.info(f"Extracted {len(columns)} columns to {location}")
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List
1
+ from typing import Any, Dict, Iterator, List
2
2
 
3
3
  from .constants import SCHEMA_NAME
4
4
 
@@ -35,17 +35,35 @@ def _to_column_payload(field: dict, position: int, table_name: str) -> dict:
35
35
  }
36
36
 
37
37
 
38
- def _to_table_payload(table: dict) -> dict:
38
+ def _to_table_payload(sobject: dict, table_name: str) -> dict:
39
39
  return {
40
- "id": table["QualifiedApiName"],
40
+ "id": table_name,
41
+ "api_name": sobject["QualifiedApiName"],
42
+ "label": sobject["Label"],
41
43
  "schema_id": SCHEMA_NAME,
42
- "table_name": table["QualifiedApiName"],
44
+ "table_name": table_name,
43
45
  "description": "",
44
46
  "tags": [],
45
47
  "type": "TABLE",
46
48
  }
47
49
 
48
50
 
51
+ def _merge_label_and_api_name(sobject: dict) -> dict:
52
+ label = sobject["Label"]
53
+ api_name = sobject["QualifiedApiName"]
54
+ table_name = f"{label} ({api_name})"
55
+ return _to_table_payload(sobject, table_name)
56
+
57
+
58
+ def _by_label(sobjects: List[dict]) -> Dict[str, List[dict]]:
59
+ by_label: Dict[str, List[dict]] = dict()
60
+ for sobject in sobjects:
61
+ label = sobject["Label"]
62
+ similar_sobjects = by_label.setdefault(label, [])
63
+ similar_sobjects.append(sobject)
64
+ return by_label
65
+
66
+
49
67
  class SalesforceFormatter:
50
68
  """
51
69
  Helper functions that format the response in the format to be exported as
@@ -53,9 +71,18 @@ class SalesforceFormatter:
53
71
  """
54
72
 
55
73
  @staticmethod
56
- def tables(sobjects: List[dict]) -> List[dict]:
57
- """formats the raw list of sobjects to tables"""
58
- return [_to_table_payload(s) for s in sobjects]
74
+ def tables(sobjects: List[dict]) -> Iterator[dict]:
75
+ """
76
+ formats the raw list of sobjects to tables
77
+ if two tables share the same label, then we add the api name as well
78
+ """
79
+ by_label = _by_label(sobjects)
80
+ for label, similars in by_label.items():
81
+ if len(similars) > 1:
82
+ yield from [_merge_label_and_api_name(s) for s in similars]
83
+ else:
84
+ sobject = similars[0] # unique sobject on label
85
+ yield _to_table_payload(sobject, label)
59
86
 
60
87
  @staticmethod
61
88
  def columns(sobject_fields: Dict[str, List[dict]]) -> List[dict]:
@@ -1,4 +1,21 @@
1
- from .format import _field_description
1
+ from typing import Dict, Tuple
2
+
3
+ from .format import (
4
+ SCHEMA_NAME,
5
+ SalesforceFormatter,
6
+ _by_label,
7
+ _field_description,
8
+ _merge_label_and_api_name,
9
+ )
10
+
11
+
12
+ def _example_sobjects() -> Tuple[Dict[str, str], ...]:
13
+ """Returns 4 sobjects with 2 sharing the same label"""
14
+ a = {"Label": "a", "QualifiedApiName": "a_one"}
15
+ b = {"Label": "b", "QualifiedApiName": "b"}
16
+ c = {"Label": "c", "QualifiedApiName": "c"}
17
+ a_prime = {"Label": "a", "QualifiedApiName": "a_two"}
18
+ return a, b, c, a_prime
2
19
 
3
20
 
4
21
  def test__field_description():
@@ -30,3 +47,34 @@ def test__field_description():
30
47
  "- Data Sensitivity Level: bam"
31
48
  )
32
49
  assert description == expected
50
+
51
+
52
+ def test__merge_label_and_api_name():
53
+ sobject = {"Label": "foo", "QualifiedApiName": "bar"}
54
+ payload = _merge_label_and_api_name(sobject)
55
+ expected_name = "foo (bar)"
56
+ assert payload == {
57
+ "id": expected_name,
58
+ "api_name": "bar",
59
+ "label": "foo",
60
+ "schema_id": SCHEMA_NAME,
61
+ "table_name": expected_name,
62
+ "description": "",
63
+ "tags": [],
64
+ "type": "TABLE",
65
+ }
66
+
67
+
68
+ def test__by_label():
69
+ a, b, c, a_prime = _example_sobjects()
70
+ sobjects = [a, b, c, a_prime]
71
+ by_label = _by_label(sobjects)
72
+ assert by_label == {"a": [a, a_prime], "b": [b], "c": [c]}
73
+
74
+
75
+ def test_salesforce_formatter_tables():
76
+ sobjects = [*_example_sobjects()]
77
+ tables = SalesforceFormatter.tables(sobjects)
78
+ expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
79
+ payload_names = {t["table_name"] for t in tables}
80
+ assert payload_names == expected_names
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.16.9
3
+ Version: 0.16.11
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=WwEWPQQuGqVnWLhPtEh3SuOlBrNgHyHcLsYuvahpN7E,10437
1
+ CHANGELOG.md,sha256=CuRENmJ6p4IM6b8vrmt6QI8uN8mX4a-FI_hJ4cQkPps,10588
2
2
  Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
3
3
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
4
4
  README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
@@ -244,10 +244,10 @@ castor_extractor/visualization/tableau/usage.py,sha256=LlFwlbEr-EnYUJjKZha99CRCR
244
244
  castor_extractor/visualization/tableau_revamp/__init__.py,sha256=a3DGjQhaz17gBqW-E84TAgupKbqLC40y5Ajo1yn-ot4,156
245
245
  castor_extractor/visualization/tableau_revamp/assets.py,sha256=owlwaI2E4UKk1YhkaHgaAXx6gu3Op6EqZ7bjp0tHI6s,351
246
246
  castor_extractor/visualization/tableau_revamp/client/__init__.py,sha256=wmS9uLtUiqNYVloi0-DgD8d2qzu3RVZEAtWiaDp6G_M,90
247
- castor_extractor/visualization/tableau_revamp/client/client.py,sha256=8BO7J-HFM2j6_f-Hjj3uSWip11eKeZ0cjhxGEqMTPRA,9428
247
+ castor_extractor/visualization/tableau_revamp/client/client.py,sha256=T7v84dnT97sFqVdzJdk1aOZ7S6U9u6d-j3KBqVj91eY,9532
248
248
  castor_extractor/visualization/tableau_revamp/client/credentials.py,sha256=fHG32egq6ll2U4BNazalMof_plzfCMQjrN9WOs6kezk,3014
249
249
  castor_extractor/visualization/tableau_revamp/client/errors.py,sha256=dTe1shqmWmAXpDpCz-E24m8dGYjt6rvIGV9qQb4jnvI,150
250
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=jBxvjQnOIWfFjMJpr7S_ZPnQhdzabxoO3jyEKi8A8ns,2112
250
+ castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=VP6xXi1mWKDGVnkWPLstLHqc3T4GVSnywyyoT6BJkFY,2153
251
251
  castor_extractor/visualization/tableau_revamp/client/tsc_fields.py,sha256=WsDliPCo-XsQ7wN-j0gpW9bdxCHvgH-aePywiltzfbU,688
252
252
  castor_extractor/visualization/tableau_revamp/constants.py,sha256=PcdudAogQhi3e-knalhgliMKjy5ahN0em_-7XSLrnxM,87
253
253
  castor_extractor/visualization/tableau_revamp/extract.py,sha256=2SLUxp5okM4AcEJJ61ZgcC2ikfZZl9MH17CEXMXmgl0,1450
@@ -323,11 +323,11 @@ castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6Wfw
323
323
  castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
324
324
  castor_extractor/warehouse/redshift/query.py,sha256=0C81rkt2cpkWrJIxxwALDyqr-49vlqQM04y_N6wwStc,540
325
325
  castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
326
- castor_extractor/warehouse/salesforce/client.py,sha256=_XiQJJJfELKGmzuBv8Mr_C0FJ-oLg71KbvpehrGvJ_k,3842
326
+ castor_extractor/warehouse/salesforce/client.py,sha256=ETnZ3n-GFFH0XohDB2ft74wI1HMspvTefR3k7ne-pmI,3891
327
327
  castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
328
- castor_extractor/warehouse/salesforce/extract.py,sha256=ZTb58t7mqhavNvErrnw8M0L4Uu3qJpQEIldymurbgl0,3417
329
- castor_extractor/warehouse/salesforce/format.py,sha256=_BSj_G6C-kPwRubxSx1WuHg-_nYVQVNgAANqNfXL5RM,2154
330
- castor_extractor/warehouse/salesforce/format_test.py,sha256=6hy0USZH7-PDQt3oZ9_3Nwlr3eHLkqNEchqIM3bIDrU,858
328
+ castor_extractor/warehouse/salesforce/extract.py,sha256=IbhkCli8bSn7tjhRNlaD_HhfmZmv-5E5ajZfEUh68Hs,3438
329
+ castor_extractor/warehouse/salesforce/format.py,sha256=f5mMJyPsVU1ZSLe5WGCUOpj2SyW7_DFfzNVNu_m2aV0,3126
330
+ castor_extractor/warehouse/salesforce/format_test.py,sha256=HBlAYBoCOHaq_QOFudZlpcZb5TyZWV9v-cxK4tklg50,2253
331
331
  castor_extractor/warehouse/salesforce/soql.py,sha256=pAEaJE8ZUcyN3ptBsZGzNcGRhCcU81X6RMlnF1HRMw4,1063
332
332
  castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
333
333
  castor_extractor/warehouse/snowflake/client.py,sha256=XT0QLVNff_586SDuMe40iu8FCwPDh2uBV5aKc1Ql914,5555
@@ -368,8 +368,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
368
368
  castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
369
369
  castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
370
370
  castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
371
- castor_extractor-0.16.9.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
372
- castor_extractor-0.16.9.dist-info/METADATA,sha256=qRP78w8BztI4N8IyOLoESkFdhKWByXf7PQQjFLTvu6A,6582
373
- castor_extractor-0.16.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
374
- castor_extractor-0.16.9.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
375
- castor_extractor-0.16.9.dist-info/RECORD,,
371
+ castor_extractor-0.16.11.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
372
+ castor_extractor-0.16.11.dist-info/METADATA,sha256=sVbdD6MsgGVPxckw8tREx_xeajevgThiIkuU2IFYBaM,6583
373
+ castor_extractor-0.16.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
374
+ castor_extractor-0.16.11.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
375
+ castor_extractor-0.16.11.dist-info/RECORD,,