castor-extractor 0.20.0__py3-none-any.whl → 0.20.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +16 -0
- castor_extractor/commands/extract_thoughtspot.py +18 -0
- castor_extractor/utils/client/api/client.py +7 -2
- castor_extractor/utils/client/api/safe_request.py +6 -3
- castor_extractor/visualization/looker/api/constants.py +0 -4
- castor_extractor/visualization/powerbi/__init__.py +1 -1
- castor_extractor/visualization/powerbi/assets.py +7 -1
- castor_extractor/visualization/powerbi/client/__init__.py +2 -3
- castor_extractor/visualization/powerbi/client/authentication.py +27 -0
- castor_extractor/visualization/powerbi/client/client.py +207 -0
- castor_extractor/visualization/powerbi/client/client_test.py +173 -0
- castor_extractor/visualization/powerbi/client/constants.py +0 -67
- castor_extractor/visualization/powerbi/client/credentials.py +3 -4
- castor_extractor/visualization/powerbi/client/credentials_test.py +3 -4
- castor_extractor/visualization/powerbi/client/endpoints.py +65 -0
- castor_extractor/visualization/powerbi/client/pagination.py +32 -0
- castor_extractor/visualization/powerbi/extract.py +14 -9
- castor_extractor/visualization/thoughtspot/__init__.py +3 -0
- castor_extractor/visualization/thoughtspot/assets.py +9 -0
- castor_extractor/visualization/thoughtspot/client/__init__.py +2 -0
- castor_extractor/visualization/thoughtspot/client/client.py +120 -0
- castor_extractor/visualization/thoughtspot/client/credentials.py +18 -0
- castor_extractor/visualization/thoughtspot/client/endpoints.py +12 -0
- castor_extractor/visualization/thoughtspot/client/utils.py +25 -0
- castor_extractor/visualization/thoughtspot/client/utils_test.py +57 -0
- castor_extractor/visualization/thoughtspot/extract.py +49 -0
- castor_extractor/warehouse/salesforce/client.py +1 -1
- castor_extractor/warehouse/salesforce/format.py +40 -30
- castor_extractor/warehouse/salesforce/format_test.py +61 -24
- {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/METADATA +17 -1
- {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/RECORD +34 -23
- {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/entry_points.txt +1 -0
- castor_extractor/visualization/powerbi/client/rest.py +0 -305
- castor_extractor/visualization/powerbi/client/rest_test.py +0 -290
- castor_extractor/visualization/powerbi/client/utils.py +0 -19
- castor_extractor/visualization/powerbi/client/utils_test.py +0 -24
- {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/LICENCE +0 -0
- {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/WHEEL +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
from typing import Dict, Tuple
|
|
1
|
+
from typing import Dict, List, Tuple
|
|
2
2
|
|
|
3
3
|
from .format import (
|
|
4
|
-
|
|
4
|
+
_HAS_DUPLICATE_KEY,
|
|
5
5
|
SalesforceFormatter,
|
|
6
|
-
|
|
6
|
+
_detect_duplicates,
|
|
7
7
|
_field_description,
|
|
8
|
-
|
|
8
|
+
_name,
|
|
9
9
|
)
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def
|
|
12
|
+
def _tables_sobjects() -> Tuple[Dict[str, str], ...]:
|
|
13
13
|
"""Returns 4 sobjects with 2 sharing the same label"""
|
|
14
14
|
a = {"Label": "a", "QualifiedApiName": "a_one"}
|
|
15
15
|
b = {"Label": "b", "QualifiedApiName": "b"}
|
|
@@ -18,6 +18,16 @@ def _example_sobjects() -> Tuple[Dict[str, str], ...]:
|
|
|
18
18
|
return a, b, c, a_prime
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
def _columns_sobjects() -> Dict[str, List[dict]]:
|
|
22
|
+
a = {"Label": "First Name", "QualifiedApiName": "owner_name"}
|
|
23
|
+
b = {"Label": "First Name", "QualifiedApiName": "editor_name"}
|
|
24
|
+
c = {"Label": "Foo Bar", "QualifiedApiName": "foo_bar"}
|
|
25
|
+
return {
|
|
26
|
+
"table_1": [a, b],
|
|
27
|
+
"table_2": [c],
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
21
31
|
def test__field_description():
|
|
22
32
|
field = {}
|
|
23
33
|
assert _field_description(field) == ""
|
|
@@ -48,32 +58,59 @@ def test__field_description():
|
|
|
48
58
|
assert description == expected
|
|
49
59
|
|
|
50
60
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"id": expected_name,
|
|
57
|
-
"api_name": "bar",
|
|
58
|
-
"label": "foo",
|
|
59
|
-
"schema_id": SCHEMA_NAME,
|
|
60
|
-
"table_name": expected_name,
|
|
61
|
-
"description": None,
|
|
62
|
-
"tags": [],
|
|
63
|
-
"type": "TABLE",
|
|
61
|
+
def test__name():
|
|
62
|
+
unique_sobject = {
|
|
63
|
+
"Label": "First Name",
|
|
64
|
+
"QualifiedApiName": "first_name",
|
|
65
|
+
_HAS_DUPLICATE_KEY: False,
|
|
64
66
|
}
|
|
67
|
+
assert _name(unique_sobject) == "First Name"
|
|
65
68
|
|
|
69
|
+
duplicate_sobject = {
|
|
70
|
+
"Label": "First Name",
|
|
71
|
+
"QualifiedApiName": "first_name",
|
|
72
|
+
_HAS_DUPLICATE_KEY: True,
|
|
73
|
+
}
|
|
74
|
+
assert _name(duplicate_sobject) == "First Name (first_name)"
|
|
75
|
+
|
|
76
|
+
empty_label_sobject = {
|
|
77
|
+
"Label": "",
|
|
78
|
+
"QualifiedApiName": "empty_label",
|
|
79
|
+
_HAS_DUPLICATE_KEY: False,
|
|
80
|
+
}
|
|
81
|
+
assert _name(empty_label_sobject) == "empty_label"
|
|
66
82
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
83
|
+
|
|
84
|
+
def test__detect_duplicates():
|
|
85
|
+
objects = [
|
|
86
|
+
{"Label": "Foo"},
|
|
87
|
+
{"Label": "Bar"},
|
|
88
|
+
{"Label": "Foo"},
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
objects = _detect_duplicates(objects)
|
|
92
|
+
assert objects == [
|
|
93
|
+
{"Label": "Foo", _HAS_DUPLICATE_KEY: True},
|
|
94
|
+
{"Label": "Bar", _HAS_DUPLICATE_KEY: False},
|
|
95
|
+
{"Label": "Foo", _HAS_DUPLICATE_KEY: True},
|
|
96
|
+
]
|
|
72
97
|
|
|
73
98
|
|
|
74
99
|
def test_salesforce_formatter_tables():
|
|
75
|
-
sobjects = [*
|
|
100
|
+
sobjects = [*_tables_sobjects()]
|
|
76
101
|
tables = SalesforceFormatter.tables(sobjects)
|
|
77
102
|
expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
|
|
78
103
|
payload_names = {t["table_name"] for t in tables}
|
|
79
104
|
assert payload_names == expected_names
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_salesforce_formatter_columns():
|
|
108
|
+
sobjects = _columns_sobjects()
|
|
109
|
+
columns = SalesforceFormatter.columns(sobjects)
|
|
110
|
+
column_ids = {c["id"] for c in columns}
|
|
111
|
+
expected_column_ids = {
|
|
112
|
+
"table_1.First Name (owner_name)",
|
|
113
|
+
"table_1.First Name (editor_name)",
|
|
114
|
+
"table_2.Foo Bar",
|
|
115
|
+
}
|
|
116
|
+
assert column_ids == expected_column_ids
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.20.
|
|
3
|
+
Version: 0.20.4
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -208,6 +208,22 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
|
|
|
208
208
|
|
|
209
209
|
# Changelog
|
|
210
210
|
|
|
211
|
+
## 0.20.4 - 2024-10-09
|
|
212
|
+
|
|
213
|
+
* Salesforce warehouse: `Labels` instead of `api_names` for columns
|
|
214
|
+
|
|
215
|
+
## 0.20.3 - 2024-10-03
|
|
216
|
+
|
|
217
|
+
* Looker: no longer extract `as_html` dashboard elements
|
|
218
|
+
|
|
219
|
+
## 0.20.2 - 2024-09-24
|
|
220
|
+
|
|
221
|
+
* Thoughtspot: Adding connector
|
|
222
|
+
|
|
223
|
+
## 0.20.1 - 2024-09-23
|
|
224
|
+
|
|
225
|
+
* Power BI: Improved client based on APIClient
|
|
226
|
+
|
|
211
227
|
## 0.20.0 - 2024-09-23
|
|
212
228
|
|
|
213
229
|
* Switch to Tableau revamped connector
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=CzVaQbFAS2hlZE2ak7DTYHWBNjMaC59e8UK7Q9p10tw,13905
|
|
2
2
|
Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -24,6 +24,7 @@ castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_
|
|
|
24
24
|
castor_extractor/commands/extract_snowflake.py,sha256=vYiruxRoo--GeMemOGsSE1w9kcKTh_y4E165HtMVzkM,1982
|
|
25
25
|
castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
|
|
26
26
|
castor_extractor/commands/extract_tableau.py,sha256=VUb_1Y85EzfF1f9OaCQQt8kFYBdp0u31Mw1Wm2fkxWs,1221
|
|
27
|
+
castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
|
|
27
28
|
castor_extractor/commands/file_check.py,sha256=VSD84kpQKf7b0wJOhUgkJQ9n4mK3v52sjMWL7wkNYa0,2667
|
|
28
29
|
castor_extractor/commands/upload.py,sha256=WLDI3zDmK2CjtbxiMWX2mZGjxx8DozfCw6tLE3CAMcE,1833
|
|
29
30
|
castor_extractor/file_checker/__init__.py,sha256=OSt6YLhUT42U_Cp3LCLHMVruwDkksL75Ij13X2UPnVk,119
|
|
@@ -73,11 +74,11 @@ castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2
|
|
|
73
74
|
castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
|
|
74
75
|
castor_extractor/utils/client/api/auth.py,sha256=QDLM5h1zGibLaKyATxLF0gycg01SE92G-Y69f_YBClc,1896
|
|
75
76
|
castor_extractor/utils/client/api/auth_test.py,sha256=NoZYsz7bcCyWBZdMF1TaOuK-s1j09DhTRyM4GSUW_YQ,1311
|
|
76
|
-
castor_extractor/utils/client/api/client.py,sha256=
|
|
77
|
+
castor_extractor/utils/client/api/client.py,sha256=_XkOMbkit0_YYJ2tY1rYrEuoPTUP826CJPnI9OLgmuQ,4434
|
|
77
78
|
castor_extractor/utils/client/api/client_test.py,sha256=FM3ZxsLLfMOBn44cXX6FIgnA31-5TTNIyp9D4LBwtXE,1222
|
|
78
79
|
castor_extractor/utils/client/api/pagination.py,sha256=Efg3P9ct_U5rtgXijMGV05oQxSzjldEopECWjIFWerM,2439
|
|
79
80
|
castor_extractor/utils/client/api/pagination_test.py,sha256=jCOgXFXrH-jrCxe2dfk80ZksJF-EtmpJPU11BGabsqk,1385
|
|
80
|
-
castor_extractor/utils/client/api/safe_request.py,sha256=
|
|
81
|
+
castor_extractor/utils/client/api/safe_request.py,sha256=dh69Uv9LMUGKDnxDnBYEYMEdTWq2GHLWD3ZwXtQP3So,1787
|
|
81
82
|
castor_extractor/utils/client/api/safe_request_test.py,sha256=LqS5FBxs6lLLcTkcgxIoLb6OinxShHXR5y4CWZpwmwg,2005
|
|
82
83
|
castor_extractor/utils/client/api/utils.py,sha256=jr8CWf48cIp8QP1P7oZ1zg9WaGlDO3mqCWgQKdEcpyc,238
|
|
83
84
|
castor_extractor/utils/client/api/utils_test.py,sha256=a5aL-pCwa74C8Ne7OT169Bjp8WPDV5Fl8MxNxAllHJg,514
|
|
@@ -144,7 +145,7 @@ castor_extractor/visualization/looker/__init__.py,sha256=mem0020YeP4_5zDnqRXOW3g
|
|
|
144
145
|
castor_extractor/visualization/looker/api/__init__.py,sha256=HDLsLy3kDWHIplAzLl1_u_bvGlgY6cuplf8myJTdfTg,169
|
|
145
146
|
castor_extractor/visualization/looker/api/client.py,sha256=xp7wV59UsrXQXrkR-vB9YH78aPu2rAfwNRFxXegJluo,11283
|
|
146
147
|
castor_extractor/visualization/looker/api/client_test.py,sha256=a80DpBOorFumXEA3D_qHuRZJqR51-DUtbz65XSLuSHc,1977
|
|
147
|
-
castor_extractor/visualization/looker/api/constants.py,sha256=
|
|
148
|
+
castor_extractor/visualization/looker/api/constants.py,sha256=wnpEtZNbvTKEsLRCSdDUOru2Y6uIFyrBt1e5Hp9T7J4,4021
|
|
148
149
|
castor_extractor/visualization/looker/api/credentials.py,sha256=dnEMW-d-g4N_JJhkXd-CJcnKLA1zBNMbgnELL_-guNI,972
|
|
149
150
|
castor_extractor/visualization/looker/api/extraction_parameters.py,sha256=53tMtYHxlgALWuKr9w-lOE0xHIqKLvIQHl4w5wufjbU,1284
|
|
150
151
|
castor_extractor/visualization/looker/api/sdk.py,sha256=KEhVCpQ__K9yTxSoIDG5y1FFuAhmeHo65pvxh7g90Ts,1600
|
|
@@ -192,17 +193,18 @@ castor_extractor/visualization/mode/client/constants.py,sha256=_Si5AF6VnpoSfnNNg
|
|
|
192
193
|
castor_extractor/visualization/mode/client/credentials.py,sha256=ptIpCCpoNt06yYaWQgl3Xu78_jVMoqsqWAGqQXVFZlo,606
|
|
193
194
|
castor_extractor/visualization/mode/errors.py,sha256=SKpFT2AiLOuWx2VRLyO7jbAiKcGDFXXrsebpNEKtr0E,1495
|
|
194
195
|
castor_extractor/visualization/mode/extract.py,sha256=g_X7k8L8MldFPbuwOrnyNMF3BEH1r-IAAgNmi3KLF-U,1623
|
|
195
|
-
castor_extractor/visualization/powerbi/__init__.py,sha256=
|
|
196
|
-
castor_extractor/visualization/powerbi/assets.py,sha256=
|
|
197
|
-
castor_extractor/visualization/powerbi/client/__init__.py,sha256=
|
|
198
|
-
castor_extractor/visualization/powerbi/client/
|
|
199
|
-
castor_extractor/visualization/powerbi/client/
|
|
200
|
-
castor_extractor/visualization/powerbi/client/
|
|
201
|
-
castor_extractor/visualization/powerbi/client/
|
|
202
|
-
castor_extractor/visualization/powerbi/client/
|
|
203
|
-
castor_extractor/visualization/powerbi/client/
|
|
204
|
-
castor_extractor/visualization/powerbi/client/
|
|
205
|
-
castor_extractor/visualization/powerbi/
|
|
196
|
+
castor_extractor/visualization/powerbi/__init__.py,sha256=AJnmfdmm2mGaInWJkUfZxRqrI7dBkTUSebpow05g5zo,135
|
|
197
|
+
castor_extractor/visualization/powerbi/assets.py,sha256=4VtYLgY81yQ3WzOEDipyK4zkS4xrIY9wjJBO1CeLpb4,932
|
|
198
|
+
castor_extractor/visualization/powerbi/client/__init__.py,sha256=8Bzhd9Z0ebVg2gDchXCOPa80Yqlq_9oCjbGi8u1M6J0,93
|
|
199
|
+
castor_extractor/visualization/powerbi/client/authentication.py,sha256=fz0v9qxeADwA1jiS9UzAQN5mA5kmZT53onlcWon2RGw,892
|
|
200
|
+
castor_extractor/visualization/powerbi/client/client.py,sha256=BDV0m00baYJLK4AyAP-TJ7rp2iEOk0ZsrPSIoSviEHI,7188
|
|
201
|
+
castor_extractor/visualization/powerbi/client/client_test.py,sha256=6NtpcKZCxBWyJO3phnVgE70Wmunb6tWsdXikkReJ02E,5539
|
|
202
|
+
castor_extractor/visualization/powerbi/client/constants.py,sha256=88R_aGachNNUZh6OSH2fkDwZtY4KTStzKm_g7HNCqqo,387
|
|
203
|
+
castor_extractor/visualization/powerbi/client/credentials.py,sha256=ueJ6AySVuigwGxIeQ7tGD2nh0UV1PnhKIkCCqFDvGBw,801
|
|
204
|
+
castor_extractor/visualization/powerbi/client/credentials_test.py,sha256=TzFqxsWVQ3sXR_n0bJsexK9Uz7ceXCEPVqDGWTJzW60,993
|
|
205
|
+
castor_extractor/visualization/powerbi/client/endpoints.py,sha256=WG1JRJ4FznUOQaUCWzjBqRnHNxQKv1-b2ZAnQVwQEVg,2058
|
|
206
|
+
castor_extractor/visualization/powerbi/client/pagination.py,sha256=OZMjoDQPRGMoWd9QcKKrPh3aErJR20SHlrTqY_siLkk,755
|
|
207
|
+
castor_extractor/visualization/powerbi/extract.py,sha256=0YCNSeTqcXSBbrl9g5dJUv8oMm7r-NT8tcfB-IdgPo8,1333
|
|
206
208
|
castor_extractor/visualization/qlik/__init__.py,sha256=u6lIfm_WOykBwt6SlaB7C0Dtx37XBliUbM5oWv26gC8,177
|
|
207
209
|
castor_extractor/visualization/qlik/assets.py,sha256=cG3Cqrj2s4inAqfW6dOaxRape2RPiCeGSYjKsRJRLLo,1657
|
|
208
210
|
castor_extractor/visualization/qlik/client/__init__.py,sha256=5O5N9Jrt3d99agFEJ28lKWs2KkDaXK-lZ07IUtLj56M,130
|
|
@@ -282,6 +284,15 @@ castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=-V3To
|
|
|
282
284
|
castor_extractor/visualization/tableau_revamp/client/rest_fields.py,sha256=gx39X1zMfRVpjmFbgvbgbvtlE0QwxOtk8rZFsIqeGRI,978
|
|
283
285
|
castor_extractor/visualization/tableau_revamp/constants.py,sha256=thS935pJyuZkdciM2EFHbIuTqSFYfB3YGCJYJ_Ls294,55
|
|
284
286
|
castor_extractor/visualization/tableau_revamp/extract.py,sha256=BPy38rFjGG6Nh1eDFeCckE4RHaO-bWW2uhXh7wm8mKk,1368
|
|
287
|
+
castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
|
|
288
|
+
castor_extractor/visualization/thoughtspot/assets.py,sha256=lPRvXk0PKybgLv1AcDVxg-ssf4XLTs0biRqLrqC2TzU,196
|
|
289
|
+
castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
|
|
290
|
+
castor_extractor/visualization/thoughtspot/client/client.py,sha256=EaJ0x87Ci5-XaPF9x7Gko2efuOmvGvVp2ViLkqlmk1I,3698
|
|
291
|
+
castor_extractor/visualization/thoughtspot/client/credentials.py,sha256=fp4YHiZy-dstWiLr5c4kFU9SyPK5rd2nCeh8k5sVRpM,462
|
|
292
|
+
castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=u3FRkmG6j5OIMEeXWZcgRObP8JeC4EutIJEeitNV44c,330
|
|
293
|
+
castor_extractor/visualization/thoughtspot/client/utils.py,sha256=54pC7t4-haWrJNPu4R7ef5dbd4zvMe3aep6bP61MglM,874
|
|
294
|
+
castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=-5ZaEYpQSrIp1-Sx-ViQOLPlv2LoOajEs2mE5YNi_tU,1887
|
|
295
|
+
castor_extractor/visualization/thoughtspot/extract.py,sha256=hpKUpwnAeu3_fPrtmAt6UhB04U8EKZgL7gJp0H7KZoM,1334
|
|
285
296
|
castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
286
297
|
castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
|
|
287
298
|
castor_extractor/warehouse/abstract/asset.py,sha256=Qs7T2Iw7KHgWVT2aAoBfCQ8tB143cUZY-DRUSkpgvGU,2689
|
|
@@ -364,11 +375,11 @@ castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6Wfw
|
|
|
364
375
|
castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
|
|
365
376
|
castor_extractor/warehouse/redshift/query.py,sha256=0C81rkt2cpkWrJIxxwALDyqr-49vlqQM04y_N6wwStc,540
|
|
366
377
|
castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
|
|
367
|
-
castor_extractor/warehouse/salesforce/client.py,sha256
|
|
378
|
+
castor_extractor/warehouse/salesforce/client.py,sha256=-9WHcQwEMrpGRQ9CN-bsRSR2Tnx9d-f_FtV4ntsf71w,3287
|
|
368
379
|
castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
|
|
369
380
|
castor_extractor/warehouse/salesforce/extract.py,sha256=GaxkGWhdksDT_rlT24KX8DMpWnhKlhDMAUvBPGalli0,3454
|
|
370
|
-
castor_extractor/warehouse/salesforce/format.py,sha256=
|
|
371
|
-
castor_extractor/warehouse/salesforce/format_test.py,sha256=
|
|
381
|
+
castor_extractor/warehouse/salesforce/format.py,sha256=DlPD4BQax2RmdDDucw1QbDUTUm2N0CzI7Gc9GymNOYA,3370
|
|
382
|
+
castor_extractor/warehouse/salesforce/format_test.py,sha256=3_OzI0GB3YVEw33ldXCcLG5NwIRZziQaCrAawT4_0g0,3266
|
|
372
383
|
castor_extractor/warehouse/salesforce/pagination.py,sha256=m1S9JRNf6Oe-6dDghYUY5wwTzGzKW5H9pE60PCXMha0,920
|
|
373
384
|
castor_extractor/warehouse/salesforce/soql.py,sha256=XB8ohKwHFfC4Xger7Y84DXLW17IJDye_bZ3FL6DCcOI,1188
|
|
374
385
|
castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
|
|
@@ -402,8 +413,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
402
413
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
403
414
|
castor_extractor/warehouse/sqlserver/query.py,sha256=j_d5-HMnzBouwGfywVZMRSSwbXzPvzDWlFCZmvxcoGQ,539
|
|
404
415
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
405
|
-
castor_extractor-0.20.
|
|
406
|
-
castor_extractor-0.20.
|
|
407
|
-
castor_extractor-0.20.
|
|
408
|
-
castor_extractor-0.20.
|
|
409
|
-
castor_extractor-0.20.
|
|
416
|
+
castor_extractor-0.20.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
417
|
+
castor_extractor-0.20.4.dist-info/METADATA,sha256=YcFx5O-gccq_JevTWl9xfeE5LGf5baiUKHfPrG1QX28,21123
|
|
418
|
+
castor_extractor-0.20.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
419
|
+
castor_extractor-0.20.4.dist-info/entry_points.txt,sha256=IVGy_oM8VjzADMAxzmiNJTYYidTCsI98MpO_mkXjkqE,1573
|
|
420
|
+
castor_extractor-0.20.4.dist-info/RECORD,,
|
|
@@ -18,6 +18,7 @@ castor-extract-sigma=castor_extractor.commands.extract_sigma:main
|
|
|
18
18
|
castor-extract-snowflake=castor_extractor.commands.extract_snowflake:main
|
|
19
19
|
castor-extract-sqlserver=castor_extractor.commands.extract_sqlserver:main
|
|
20
20
|
castor-extract-tableau=castor_extractor.commands.extract_tableau:main
|
|
21
|
+
castor-extract-thoughtspot=castor_extractor.commands.extract_thoughtspot:main
|
|
21
22
|
castor-file-check=castor_extractor.commands.file_check:main
|
|
22
23
|
castor-upload=castor_extractor.commands.upload:main
|
|
23
24
|
|
|
@@ -1,305 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from datetime import date, datetime
|
|
3
|
-
from time import sleep
|
|
4
|
-
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
|
|
5
|
-
|
|
6
|
-
import msal # type: ignore
|
|
7
|
-
import requests
|
|
8
|
-
|
|
9
|
-
from ....utils import at_midnight, format_date, yesterday
|
|
10
|
-
from ..assets import PowerBiAsset
|
|
11
|
-
from .constants import (
|
|
12
|
-
DEFAULT_TIMEOUT_IN_SECS,
|
|
13
|
-
GET,
|
|
14
|
-
POST,
|
|
15
|
-
SCAN_READY,
|
|
16
|
-
Batches,
|
|
17
|
-
Keys,
|
|
18
|
-
QueryParams,
|
|
19
|
-
Urls,
|
|
20
|
-
)
|
|
21
|
-
from .credentials import PowerbiCredentials
|
|
22
|
-
from .utils import batch_size_is_valid_or_assert, datetime_is_recent_or_assert
|
|
23
|
-
|
|
24
|
-
logger = logging.getLogger(__name__)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def _time_filter(day: Optional[date]) -> Tuple[datetime, datetime]:
|
|
28
|
-
target_day = day or yesterday()
|
|
29
|
-
start = at_midnight(target_day)
|
|
30
|
-
end = datetime.combine(target_day, datetime.max.time())
|
|
31
|
-
return start, end
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def _url(
|
|
35
|
-
day: Optional[date],
|
|
36
|
-
continuation_uri: Optional[str],
|
|
37
|
-
) -> str:
|
|
38
|
-
if continuation_uri:
|
|
39
|
-
return continuation_uri
|
|
40
|
-
|
|
41
|
-
url = Urls.ACTIVITY_EVENTS
|
|
42
|
-
start, end = _time_filter(day)
|
|
43
|
-
url += "?$filter=Activity eq 'viewreport'"
|
|
44
|
-
url += f"&startDateTime='{format_date(start)}'"
|
|
45
|
-
url += f"&endDateTime='{format_date(end)}'"
|
|
46
|
-
return url
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class Client:
|
|
50
|
-
"""
|
|
51
|
-
PowerBI rest admin api
|
|
52
|
-
https://learn.microsoft.com/en-us/rest/api/power-bi/admin
|
|
53
|
-
"""
|
|
54
|
-
|
|
55
|
-
def __init__(self, credentials: PowerbiCredentials):
|
|
56
|
-
self.creds = credentials
|
|
57
|
-
client_app = f"{Urls.CLIENT_APP_BASE}{self.creds.tenant_id}"
|
|
58
|
-
self.app = msal.ConfidentialClientApplication(
|
|
59
|
-
client_id=self.creds.client_id,
|
|
60
|
-
authority=client_app,
|
|
61
|
-
client_credential=self.creds.secret,
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
def _access_token(self) -> dict:
|
|
65
|
-
token = self.app.acquire_token_for_client(scopes=self.creds.scopes)
|
|
66
|
-
|
|
67
|
-
if Keys.ACCESS_TOKEN not in token:
|
|
68
|
-
raise ValueError(f"No access token in token response: {token}")
|
|
69
|
-
|
|
70
|
-
return token
|
|
71
|
-
|
|
72
|
-
def _header(self) -> Dict:
|
|
73
|
-
"""Return header used in following rest api call"""
|
|
74
|
-
token = self._access_token()
|
|
75
|
-
return {"Authorization": f"Bearer {token[Keys.ACCESS_TOKEN]}"}
|
|
76
|
-
|
|
77
|
-
def _call(
|
|
78
|
-
self,
|
|
79
|
-
url: str,
|
|
80
|
-
method: str = GET,
|
|
81
|
-
*,
|
|
82
|
-
params: Optional[Dict] = None,
|
|
83
|
-
data: Optional[dict] = None,
|
|
84
|
-
processor: Optional[Callable] = None,
|
|
85
|
-
) -> Any:
|
|
86
|
-
"""
|
|
87
|
-
Make either a get or a post http request.Request, by default
|
|
88
|
-
result.json is returned. Optionally you can provide a processor callback
|
|
89
|
-
to transform the result.
|
|
90
|
-
"""
|
|
91
|
-
logger.debug(f"Calling {method} on {url}")
|
|
92
|
-
result = requests.request(
|
|
93
|
-
method,
|
|
94
|
-
url,
|
|
95
|
-
headers=self._header(),
|
|
96
|
-
params=params,
|
|
97
|
-
data=data,
|
|
98
|
-
)
|
|
99
|
-
result.raise_for_status()
|
|
100
|
-
|
|
101
|
-
if processor:
|
|
102
|
-
return processor(result)
|
|
103
|
-
|
|
104
|
-
return result.json()
|
|
105
|
-
|
|
106
|
-
def _get(
|
|
107
|
-
self,
|
|
108
|
-
url: str,
|
|
109
|
-
*,
|
|
110
|
-
params: Optional[Dict] = None,
|
|
111
|
-
processor: Optional[Callable] = None,
|
|
112
|
-
) -> Any:
|
|
113
|
-
return self._call(url, GET, params=params, processor=processor)
|
|
114
|
-
|
|
115
|
-
def _post(
|
|
116
|
-
self,
|
|
117
|
-
url: str,
|
|
118
|
-
*,
|
|
119
|
-
params: Optional[dict],
|
|
120
|
-
data: Optional[dict],
|
|
121
|
-
processor: Optional[Callable] = None,
|
|
122
|
-
) -> Any:
|
|
123
|
-
return self._call(
|
|
124
|
-
url,
|
|
125
|
-
POST,
|
|
126
|
-
params=params,
|
|
127
|
-
data=data,
|
|
128
|
-
processor=processor,
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
def _workspace_ids(
|
|
132
|
-
self,
|
|
133
|
-
modified_since: Optional[datetime] = None,
|
|
134
|
-
) -> List[str]:
|
|
135
|
-
"""
|
|
136
|
-
Get workspaces ids from powerBI admin API.
|
|
137
|
-
If modified_since, take only workspaces that have been modified since
|
|
138
|
-
|
|
139
|
-
more: https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-get-modified-workspaces
|
|
140
|
-
"""
|
|
141
|
-
|
|
142
|
-
def result_callback(call_result: requests.models.Response) -> List[str]:
|
|
143
|
-
return [x["id"] for x in call_result.json()]
|
|
144
|
-
|
|
145
|
-
params: Dict[str, Union[bool, str]] = {
|
|
146
|
-
Keys.INACTIVE_WORKSPACES: True,
|
|
147
|
-
Keys.PERSONAL_WORKSPACES: True,
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
if modified_since:
|
|
151
|
-
datetime_is_recent_or_assert(modified_since)
|
|
152
|
-
modified_since_iso = f"{modified_since.isoformat()}0Z"
|
|
153
|
-
params[Keys.MODIFIED_SINCE] = modified_since_iso
|
|
154
|
-
|
|
155
|
-
result = self._get(
|
|
156
|
-
Urls.WORKSPACE_IDS,
|
|
157
|
-
params=params,
|
|
158
|
-
processor=result_callback,
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
return result
|
|
162
|
-
|
|
163
|
-
def _create_scan(self, workspaces_ids: List[str]) -> int:
|
|
164
|
-
batch_size_is_valid_or_assert(workspaces_ids)
|
|
165
|
-
request_body = {"workspaces": workspaces_ids}
|
|
166
|
-
params = QueryParams.METADATA_SCAN
|
|
167
|
-
scan_id = self._post(
|
|
168
|
-
Urls.METADATA_POST,
|
|
169
|
-
params=params,
|
|
170
|
-
data=request_body,
|
|
171
|
-
)
|
|
172
|
-
return scan_id[Keys.ID]
|
|
173
|
-
|
|
174
|
-
def _wait_for_scan_result(self, scan_id: int) -> bool:
|
|
175
|
-
url = f"{Urls.METADATA_WAIT}/{scan_id}"
|
|
176
|
-
waiting_seconds = 0
|
|
177
|
-
sleep_seconds = 1
|
|
178
|
-
while True:
|
|
179
|
-
result = self._get(url, processor=lambda x: x)
|
|
180
|
-
if result.status_code != 200:
|
|
181
|
-
return False
|
|
182
|
-
if result.json()[Keys.STATUS] == SCAN_READY:
|
|
183
|
-
logger.info(f"scan {scan_id} ready")
|
|
184
|
-
return True
|
|
185
|
-
if waiting_seconds >= DEFAULT_TIMEOUT_IN_SECS:
|
|
186
|
-
break
|
|
187
|
-
waiting_seconds += sleep_seconds
|
|
188
|
-
logger.info(
|
|
189
|
-
f"Waiting {sleep_seconds} sec for scan {scan_id} to be ready…",
|
|
190
|
-
)
|
|
191
|
-
sleep(sleep_seconds)
|
|
192
|
-
return False
|
|
193
|
-
|
|
194
|
-
def _get_scan(self, scan_id: int) -> List[dict]:
|
|
195
|
-
url = f"{Urls.METADATA_GET}/{scan_id}"
|
|
196
|
-
return self._get(url)[Keys.WORKSPACES]
|
|
197
|
-
|
|
198
|
-
def _activity_events(
|
|
199
|
-
self,
|
|
200
|
-
*,
|
|
201
|
-
day: Optional[date] = None,
|
|
202
|
-
continuation_uri: Optional[str] = None,
|
|
203
|
-
) -> List[Dict]:
|
|
204
|
-
"""
|
|
205
|
-
Returns a list of activity events for the organization.
|
|
206
|
-
https://learn.microsoft.com/en-us/power-bi/admin/service-admin-auditing#activityevents-rest-api
|
|
207
|
-
- when no day is specified, fallback is yesterday
|
|
208
|
-
- continuation_uri allows to fetch paginated data (internal usage)
|
|
209
|
-
"""
|
|
210
|
-
url = _url(day, continuation_uri)
|
|
211
|
-
answer = self._get(url)
|
|
212
|
-
activity_events = answer[Keys.ACTIVITY_EVENT_ENTITIES]
|
|
213
|
-
is_last = answer[Keys.LAST_RESULT_SET]
|
|
214
|
-
assert isinstance(is_last, bool)
|
|
215
|
-
if is_last:
|
|
216
|
-
return activity_events
|
|
217
|
-
|
|
218
|
-
# there are more data to fetch
|
|
219
|
-
# https://learn.microsoft.com/en-us/rest/api/power-bi/admin/get-activity-events#get-the-next-set-of-audit-activity-events-by-sending-the-continuation-token-to-the-api-example
|
|
220
|
-
continuation_uri = answer[Keys.CONTINUATION_URI]
|
|
221
|
-
rest = self._activity_events(continuation_uri=continuation_uri)
|
|
222
|
-
activity_events.extend(rest)
|
|
223
|
-
return activity_events
|
|
224
|
-
|
|
225
|
-
def _datasets(self) -> List[Dict]:
|
|
226
|
-
"""
|
|
227
|
-
Returns a list of datasets for the organization.
|
|
228
|
-
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin
|
|
229
|
-
"""
|
|
230
|
-
return self._get(Urls.DATASETS)[Keys.VALUE]
|
|
231
|
-
|
|
232
|
-
def _reports(self) -> List[Dict]:
|
|
233
|
-
"""
|
|
234
|
-
Returns a list of reports for the organization.
|
|
235
|
-
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/reports-get-reports-as-admin
|
|
236
|
-
"""
|
|
237
|
-
reports = self._get(Urls.REPORTS)[Keys.VALUE]
|
|
238
|
-
for report in reports:
|
|
239
|
-
report_id = report.get("id")
|
|
240
|
-
try:
|
|
241
|
-
url = Urls.REPORTS + f"/{report_id}/pages"
|
|
242
|
-
pages = self._get(url)[Keys.VALUE]
|
|
243
|
-
report["pages"] = pages
|
|
244
|
-
except (requests.HTTPError, requests.exceptions.Timeout) as e:
|
|
245
|
-
logger.debug(e)
|
|
246
|
-
continue
|
|
247
|
-
return reports
|
|
248
|
-
|
|
249
|
-
def _dashboards(self) -> List[Dict]:
|
|
250
|
-
"""
|
|
251
|
-
Returns a list of dashboards for the organization.
|
|
252
|
-
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/dashboards-get-dashboards-as-admin
|
|
253
|
-
"""
|
|
254
|
-
return self._get(Urls.DASHBOARD)[Keys.VALUE]
|
|
255
|
-
|
|
256
|
-
def _metadata(
|
|
257
|
-
self,
|
|
258
|
-
modified_since: Optional[datetime] = None,
|
|
259
|
-
) -> Iterator[List[Dict]]:
|
|
260
|
-
"""
|
|
261
|
-
Fetch metadata by workspace.
|
|
262
|
-
https://learn.microsoft.com/en-us/power-bi/enterprise/service-admin-metadata-scanning
|
|
263
|
-
"""
|
|
264
|
-
ids = self._workspace_ids(modified_since)
|
|
265
|
-
|
|
266
|
-
for ix in range(0, len(ids), Batches.METADATA):
|
|
267
|
-
batch_ids = [w_id for w_id in ids[ix : ix + Batches.METADATA]]
|
|
268
|
-
scan_id = self._create_scan(batch_ids)
|
|
269
|
-
self._wait_for_scan_result(scan_id)
|
|
270
|
-
yield self._get_scan(scan_id)
|
|
271
|
-
|
|
272
|
-
def test_connection(self) -> None:
|
|
273
|
-
"""Use credentials & verify requesting the API doesn't raise an error"""
|
|
274
|
-
self._header()
|
|
275
|
-
|
|
276
|
-
def fetch(
|
|
277
|
-
self,
|
|
278
|
-
asset: PowerBiAsset,
|
|
279
|
-
*,
|
|
280
|
-
modified_since: Optional[datetime] = None,
|
|
281
|
-
day: Optional[date] = None,
|
|
282
|
-
) -> List[Dict]:
|
|
283
|
-
"""
|
|
284
|
-
Given a PowerBi asset, returns the corresponding data using the
|
|
285
|
-
appropriate client.
|
|
286
|
-
"""
|
|
287
|
-
logger.info(f"Starting extraction of {asset}")
|
|
288
|
-
asset = PowerBiAsset(asset)
|
|
289
|
-
|
|
290
|
-
if asset == PowerBiAsset.ACTIVITY_EVENTS:
|
|
291
|
-
return self._activity_events(day=day)
|
|
292
|
-
|
|
293
|
-
if asset == PowerBiAsset.DATASETS:
|
|
294
|
-
return self._datasets()
|
|
295
|
-
|
|
296
|
-
if asset == PowerBiAsset.DASHBOARDS:
|
|
297
|
-
return self._dashboards()
|
|
298
|
-
|
|
299
|
-
if asset == PowerBiAsset.REPORTS:
|
|
300
|
-
return self._reports()
|
|
301
|
-
|
|
302
|
-
assert asset == PowerBiAsset.METADATA
|
|
303
|
-
return [
|
|
304
|
-
item for batch in self._metadata(modified_since) for item in batch
|
|
305
|
-
]
|