castor-extractor 0.16.9__py3-none-any.whl → 0.16.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- castor_extractor/visualization/tableau_revamp/client/client.py +5 -2
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py +5 -1
- castor_extractor/warehouse/salesforce/client.py +8 -6
- castor_extractor/warehouse/salesforce/extract.py +2 -2
- castor_extractor/warehouse/salesforce/format.py +34 -7
- castor_extractor/warehouse/salesforce/format_test.py +49 -1
- {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.11.dist-info}/METADATA +1 -1
- {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.11.dist-info}/RECORD +12 -12
- {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.11.dist-info}/LICENCE +0 -0
- {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.11.dist-info}/WHEEL +0 -0
- {castor_extractor-0.16.9.dist-info → castor_extractor-0.16.11.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.16.11 - 2024-06-03
|
|
4
|
+
|
|
5
|
+
* Tableau: add extra fields to optimise storage
|
|
6
|
+
|
|
7
|
+
## 0.16.10 - 2024-05-30
|
|
8
|
+
|
|
9
|
+
* Salesforce: extract sobjects Label as table name
|
|
10
|
+
|
|
3
11
|
## 0.16.9 - 2024-05-28
|
|
4
12
|
|
|
5
13
|
* Tableau: extract only fields that are necessary
|
|
@@ -28,10 +28,13 @@ _TSC_ASSETS = (
|
|
|
28
28
|
TableauRevampAsset.USAGE,
|
|
29
29
|
)
|
|
30
30
|
|
|
31
|
-
#
|
|
31
|
+
# increase the value when extraction is too slow
|
|
32
|
+
# decrease the value when timeouts arise
|
|
32
33
|
_CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
|
|
34
|
+
# fields and columns are light but volumes are bigger
|
|
35
|
+
TableauRevampAsset.COLUMN: 200,
|
|
33
36
|
TableauRevampAsset.FIELD: 1000,
|
|
34
|
-
TableauRevampAsset.
|
|
37
|
+
TableauRevampAsset.TABLE: 50,
|
|
35
38
|
}
|
|
36
39
|
|
|
37
40
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Dict, Iterator, List
|
|
2
|
+
from typing import Dict, Iterator, List, Tuple
|
|
3
3
|
|
|
4
4
|
from tqdm import tqdm # type: ignore
|
|
5
5
|
|
|
@@ -96,17 +96,19 @@ class SalesforceClient(SalesforceBaseClient):
|
|
|
96
96
|
"""
|
|
97
97
|
sobjects = self.fetch_sobjects()
|
|
98
98
|
logger.info(f"Extracted {len(sobjects)} sobjects")
|
|
99
|
-
return self.formatter.tables(sobjects)
|
|
99
|
+
return list(self.formatter.tables(sobjects))
|
|
100
100
|
|
|
101
101
|
def columns(
|
|
102
|
-
self, sobject_names: List[str], show_progress: bool = True
|
|
102
|
+
self, sobject_names: List[Tuple[str, str]], show_progress: bool = True
|
|
103
103
|
) -> List[dict]:
|
|
104
104
|
"""
|
|
105
105
|
Get salesforce sobject fields as columns
|
|
106
106
|
show_progress: optionally deactivate the tqdm progress bar
|
|
107
107
|
"""
|
|
108
108
|
sobject_fields: Dict[str, List[dict]] = dict()
|
|
109
|
-
for
|
|
110
|
-
|
|
111
|
-
|
|
109
|
+
for api_name, table_name in tqdm(
|
|
110
|
+
sobject_names, disable=not show_progress
|
|
111
|
+
):
|
|
112
|
+
fields = self.fetch_fields(api_name)
|
|
113
|
+
sobject_fields[table_name] = fields
|
|
112
114
|
return self.formatter.columns(sobject_fields)
|
|
@@ -72,8 +72,8 @@ class SalesforceExtractionProcessor:
|
|
|
72
72
|
catalog_locations[WarehouseAsset.TABLE.value] = location
|
|
73
73
|
logger.info(f"Extracted {len(tables)} tables to {location}")
|
|
74
74
|
|
|
75
|
-
|
|
76
|
-
columns = self._client.columns(
|
|
75
|
+
sobject_names = [(t["api_name"], t["table_name"]) for t in tables]
|
|
76
|
+
columns = self._client.columns(sobject_names, show_progress)
|
|
77
77
|
location = self._storage.put(WarehouseAsset.COLUMN.value, columns)
|
|
78
78
|
catalog_locations[WarehouseAsset.COLUMN.value] = location
|
|
79
79
|
logger.info(f"Extracted {len(columns)} columns to {location}")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Dict, List
|
|
1
|
+
from typing import Any, Dict, Iterator, List
|
|
2
2
|
|
|
3
3
|
from .constants import SCHEMA_NAME
|
|
4
4
|
|
|
@@ -35,17 +35,35 @@ def _to_column_payload(field: dict, position: int, table_name: str) -> dict:
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
def _to_table_payload(
|
|
38
|
+
def _to_table_payload(sobject: dict, table_name: str) -> dict:
|
|
39
39
|
return {
|
|
40
|
-
"id":
|
|
40
|
+
"id": table_name,
|
|
41
|
+
"api_name": sobject["QualifiedApiName"],
|
|
42
|
+
"label": sobject["Label"],
|
|
41
43
|
"schema_id": SCHEMA_NAME,
|
|
42
|
-
"table_name":
|
|
44
|
+
"table_name": table_name,
|
|
43
45
|
"description": "",
|
|
44
46
|
"tags": [],
|
|
45
47
|
"type": "TABLE",
|
|
46
48
|
}
|
|
47
49
|
|
|
48
50
|
|
|
51
|
+
def _merge_label_and_api_name(sobject: dict) -> dict:
|
|
52
|
+
label = sobject["Label"]
|
|
53
|
+
api_name = sobject["QualifiedApiName"]
|
|
54
|
+
table_name = f"{label} ({api_name})"
|
|
55
|
+
return _to_table_payload(sobject, table_name)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _by_label(sobjects: List[dict]) -> Dict[str, List[dict]]:
|
|
59
|
+
by_label: Dict[str, List[dict]] = dict()
|
|
60
|
+
for sobject in sobjects:
|
|
61
|
+
label = sobject["Label"]
|
|
62
|
+
similar_sobjects = by_label.setdefault(label, [])
|
|
63
|
+
similar_sobjects.append(sobject)
|
|
64
|
+
return by_label
|
|
65
|
+
|
|
66
|
+
|
|
49
67
|
class SalesforceFormatter:
|
|
50
68
|
"""
|
|
51
69
|
Helper functions that format the response in the format to be exported as
|
|
@@ -53,9 +71,18 @@ class SalesforceFormatter:
|
|
|
53
71
|
"""
|
|
54
72
|
|
|
55
73
|
@staticmethod
|
|
56
|
-
def tables(sobjects: List[dict]) ->
|
|
57
|
-
"""
|
|
58
|
-
|
|
74
|
+
def tables(sobjects: List[dict]) -> Iterator[dict]:
|
|
75
|
+
"""
|
|
76
|
+
formats the raw list of sobjects to tables
|
|
77
|
+
if two tables share the same label, then we add the api name as well
|
|
78
|
+
"""
|
|
79
|
+
by_label = _by_label(sobjects)
|
|
80
|
+
for label, similars in by_label.items():
|
|
81
|
+
if len(similars) > 1:
|
|
82
|
+
yield from [_merge_label_and_api_name(s) for s in similars]
|
|
83
|
+
else:
|
|
84
|
+
sobject = similars[0] # unique sobject on label
|
|
85
|
+
yield _to_table_payload(sobject, label)
|
|
59
86
|
|
|
60
87
|
@staticmethod
|
|
61
88
|
def columns(sobject_fields: Dict[str, List[dict]]) -> List[dict]:
|
|
@@ -1,4 +1,21 @@
|
|
|
1
|
-
from
|
|
1
|
+
from typing import Dict, Tuple
|
|
2
|
+
|
|
3
|
+
from .format import (
|
|
4
|
+
SCHEMA_NAME,
|
|
5
|
+
SalesforceFormatter,
|
|
6
|
+
_by_label,
|
|
7
|
+
_field_description,
|
|
8
|
+
_merge_label_and_api_name,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _example_sobjects() -> Tuple[Dict[str, str], ...]:
|
|
13
|
+
"""Returns 4 sobjects with 2 sharing the same label"""
|
|
14
|
+
a = {"Label": "a", "QualifiedApiName": "a_one"}
|
|
15
|
+
b = {"Label": "b", "QualifiedApiName": "b"}
|
|
16
|
+
c = {"Label": "c", "QualifiedApiName": "c"}
|
|
17
|
+
a_prime = {"Label": "a", "QualifiedApiName": "a_two"}
|
|
18
|
+
return a, b, c, a_prime
|
|
2
19
|
|
|
3
20
|
|
|
4
21
|
def test__field_description():
|
|
@@ -30,3 +47,34 @@ def test__field_description():
|
|
|
30
47
|
"- Data Sensitivity Level: bam"
|
|
31
48
|
)
|
|
32
49
|
assert description == expected
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test__merge_label_and_api_name():
|
|
53
|
+
sobject = {"Label": "foo", "QualifiedApiName": "bar"}
|
|
54
|
+
payload = _merge_label_and_api_name(sobject)
|
|
55
|
+
expected_name = "foo (bar)"
|
|
56
|
+
assert payload == {
|
|
57
|
+
"id": expected_name,
|
|
58
|
+
"api_name": "bar",
|
|
59
|
+
"label": "foo",
|
|
60
|
+
"schema_id": SCHEMA_NAME,
|
|
61
|
+
"table_name": expected_name,
|
|
62
|
+
"description": "",
|
|
63
|
+
"tags": [],
|
|
64
|
+
"type": "TABLE",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test__by_label():
|
|
69
|
+
a, b, c, a_prime = _example_sobjects()
|
|
70
|
+
sobjects = [a, b, c, a_prime]
|
|
71
|
+
by_label = _by_label(sobjects)
|
|
72
|
+
assert by_label == {"a": [a, a_prime], "b": [b], "c": [c]}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_salesforce_formatter_tables():
|
|
76
|
+
sobjects = [*_example_sobjects()]
|
|
77
|
+
tables = SalesforceFormatter.tables(sobjects)
|
|
78
|
+
expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
|
|
79
|
+
payload_names = {t["table_name"] for t in tables}
|
|
80
|
+
assert payload_names == expected_names
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=CuRENmJ6p4IM6b8vrmt6QI8uN8mX4a-FI_hJ4cQkPps,10588
|
|
2
2
|
Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
|
|
3
3
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
4
4
|
README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
|
|
@@ -244,10 +244,10 @@ castor_extractor/visualization/tableau/usage.py,sha256=LlFwlbEr-EnYUJjKZha99CRCR
|
|
|
244
244
|
castor_extractor/visualization/tableau_revamp/__init__.py,sha256=a3DGjQhaz17gBqW-E84TAgupKbqLC40y5Ajo1yn-ot4,156
|
|
245
245
|
castor_extractor/visualization/tableau_revamp/assets.py,sha256=owlwaI2E4UKk1YhkaHgaAXx6gu3Op6EqZ7bjp0tHI6s,351
|
|
246
246
|
castor_extractor/visualization/tableau_revamp/client/__init__.py,sha256=wmS9uLtUiqNYVloi0-DgD8d2qzu3RVZEAtWiaDp6G_M,90
|
|
247
|
-
castor_extractor/visualization/tableau_revamp/client/client.py,sha256=
|
|
247
|
+
castor_extractor/visualization/tableau_revamp/client/client.py,sha256=T7v84dnT97sFqVdzJdk1aOZ7S6U9u6d-j3KBqVj91eY,9532
|
|
248
248
|
castor_extractor/visualization/tableau_revamp/client/credentials.py,sha256=fHG32egq6ll2U4BNazalMof_plzfCMQjrN9WOs6kezk,3014
|
|
249
249
|
castor_extractor/visualization/tableau_revamp/client/errors.py,sha256=dTe1shqmWmAXpDpCz-E24m8dGYjt6rvIGV9qQb4jnvI,150
|
|
250
|
-
castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=
|
|
250
|
+
castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=VP6xXi1mWKDGVnkWPLstLHqc3T4GVSnywyyoT6BJkFY,2153
|
|
251
251
|
castor_extractor/visualization/tableau_revamp/client/tsc_fields.py,sha256=WsDliPCo-XsQ7wN-j0gpW9bdxCHvgH-aePywiltzfbU,688
|
|
252
252
|
castor_extractor/visualization/tableau_revamp/constants.py,sha256=PcdudAogQhi3e-knalhgliMKjy5ahN0em_-7XSLrnxM,87
|
|
253
253
|
castor_extractor/visualization/tableau_revamp/extract.py,sha256=2SLUxp5okM4AcEJJ61ZgcC2ikfZZl9MH17CEXMXmgl0,1450
|
|
@@ -323,11 +323,11 @@ castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6Wfw
|
|
|
323
323
|
castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
|
|
324
324
|
castor_extractor/warehouse/redshift/query.py,sha256=0C81rkt2cpkWrJIxxwALDyqr-49vlqQM04y_N6wwStc,540
|
|
325
325
|
castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
|
|
326
|
-
castor_extractor/warehouse/salesforce/client.py,sha256=
|
|
326
|
+
castor_extractor/warehouse/salesforce/client.py,sha256=ETnZ3n-GFFH0XohDB2ft74wI1HMspvTefR3k7ne-pmI,3891
|
|
327
327
|
castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
|
|
328
|
-
castor_extractor/warehouse/salesforce/extract.py,sha256=
|
|
329
|
-
castor_extractor/warehouse/salesforce/format.py,sha256=
|
|
330
|
-
castor_extractor/warehouse/salesforce/format_test.py,sha256=
|
|
328
|
+
castor_extractor/warehouse/salesforce/extract.py,sha256=IbhkCli8bSn7tjhRNlaD_HhfmZmv-5E5ajZfEUh68Hs,3438
|
|
329
|
+
castor_extractor/warehouse/salesforce/format.py,sha256=f5mMJyPsVU1ZSLe5WGCUOpj2SyW7_DFfzNVNu_m2aV0,3126
|
|
330
|
+
castor_extractor/warehouse/salesforce/format_test.py,sha256=HBlAYBoCOHaq_QOFudZlpcZb5TyZWV9v-cxK4tklg50,2253
|
|
331
331
|
castor_extractor/warehouse/salesforce/soql.py,sha256=pAEaJE8ZUcyN3ptBsZGzNcGRhCcU81X6RMlnF1HRMw4,1063
|
|
332
332
|
castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
|
|
333
333
|
castor_extractor/warehouse/snowflake/client.py,sha256=XT0QLVNff_586SDuMe40iu8FCwPDh2uBV5aKc1Ql914,5555
|
|
@@ -368,8 +368,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
|
|
|
368
368
|
castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
|
|
369
369
|
castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
|
|
370
370
|
castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
|
|
371
|
-
castor_extractor-0.16.
|
|
372
|
-
castor_extractor-0.16.
|
|
373
|
-
castor_extractor-0.16.
|
|
374
|
-
castor_extractor-0.16.
|
|
375
|
-
castor_extractor-0.16.
|
|
371
|
+
castor_extractor-0.16.11.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
372
|
+
castor_extractor-0.16.11.dist-info/METADATA,sha256=sVbdD6MsgGVPxckw8tREx_xeajevgThiIkuU2IFYBaM,6583
|
|
373
|
+
castor_extractor-0.16.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
374
|
+
castor_extractor-0.16.11.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
|
|
375
|
+
castor_extractor-0.16.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|