castor-extractor 0.20.0__py3-none-any.whl → 0.20.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (38) hide show
  1. CHANGELOG.md +16 -0
  2. castor_extractor/commands/extract_thoughtspot.py +18 -0
  3. castor_extractor/utils/client/api/client.py +7 -2
  4. castor_extractor/utils/client/api/safe_request.py +6 -3
  5. castor_extractor/visualization/looker/api/constants.py +0 -4
  6. castor_extractor/visualization/powerbi/__init__.py +1 -1
  7. castor_extractor/visualization/powerbi/assets.py +7 -1
  8. castor_extractor/visualization/powerbi/client/__init__.py +2 -3
  9. castor_extractor/visualization/powerbi/client/authentication.py +27 -0
  10. castor_extractor/visualization/powerbi/client/client.py +207 -0
  11. castor_extractor/visualization/powerbi/client/client_test.py +173 -0
  12. castor_extractor/visualization/powerbi/client/constants.py +0 -67
  13. castor_extractor/visualization/powerbi/client/credentials.py +3 -4
  14. castor_extractor/visualization/powerbi/client/credentials_test.py +3 -4
  15. castor_extractor/visualization/powerbi/client/endpoints.py +65 -0
  16. castor_extractor/visualization/powerbi/client/pagination.py +32 -0
  17. castor_extractor/visualization/powerbi/extract.py +14 -9
  18. castor_extractor/visualization/thoughtspot/__init__.py +3 -0
  19. castor_extractor/visualization/thoughtspot/assets.py +9 -0
  20. castor_extractor/visualization/thoughtspot/client/__init__.py +2 -0
  21. castor_extractor/visualization/thoughtspot/client/client.py +120 -0
  22. castor_extractor/visualization/thoughtspot/client/credentials.py +18 -0
  23. castor_extractor/visualization/thoughtspot/client/endpoints.py +12 -0
  24. castor_extractor/visualization/thoughtspot/client/utils.py +25 -0
  25. castor_extractor/visualization/thoughtspot/client/utils_test.py +57 -0
  26. castor_extractor/visualization/thoughtspot/extract.py +49 -0
  27. castor_extractor/warehouse/salesforce/client.py +1 -1
  28. castor_extractor/warehouse/salesforce/format.py +40 -30
  29. castor_extractor/warehouse/salesforce/format_test.py +61 -24
  30. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/METADATA +17 -1
  31. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/RECORD +34 -23
  32. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/entry_points.txt +1 -0
  33. castor_extractor/visualization/powerbi/client/rest.py +0 -305
  34. castor_extractor/visualization/powerbi/client/rest_test.py +0 -290
  35. castor_extractor/visualization/powerbi/client/utils.py +0 -19
  36. castor_extractor/visualization/powerbi/client/utils_test.py +0 -24
  37. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/LICENCE +0 -0
  38. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.4.dist-info}/WHEEL +0 -0
@@ -1,15 +1,15 @@
1
- from typing import Dict, Tuple
1
+ from typing import Dict, List, Tuple
2
2
 
3
3
  from .format import (
4
- SCHEMA_NAME,
4
+ _HAS_DUPLICATE_KEY,
5
5
  SalesforceFormatter,
6
- _by_label,
6
+ _detect_duplicates,
7
7
  _field_description,
8
- _merge_label_and_api_name,
8
+ _name,
9
9
  )
10
10
 
11
11
 
12
- def _example_sobjects() -> Tuple[Dict[str, str], ...]:
12
+ def _tables_sobjects() -> Tuple[Dict[str, str], ...]:
13
13
  """Returns 4 sobjects with 2 sharing the same label"""
14
14
  a = {"Label": "a", "QualifiedApiName": "a_one"}
15
15
  b = {"Label": "b", "QualifiedApiName": "b"}
@@ -18,6 +18,16 @@ def _example_sobjects() -> Tuple[Dict[str, str], ...]:
18
18
  return a, b, c, a_prime
19
19
 
20
20
 
21
+ def _columns_sobjects() -> Dict[str, List[dict]]:
22
+ a = {"Label": "First Name", "QualifiedApiName": "owner_name"}
23
+ b = {"Label": "First Name", "QualifiedApiName": "editor_name"}
24
+ c = {"Label": "Foo Bar", "QualifiedApiName": "foo_bar"}
25
+ return {
26
+ "table_1": [a, b],
27
+ "table_2": [c],
28
+ }
29
+
30
+
21
31
  def test__field_description():
22
32
  field = {}
23
33
  assert _field_description(field) == ""
@@ -48,32 +58,59 @@ def test__field_description():
48
58
  assert description == expected
49
59
 
50
60
 
51
- def test__merge_label_and_api_name():
52
- sobject = {"Label": "foo", "QualifiedApiName": "bar"}
53
- payload = _merge_label_and_api_name(sobject)
54
- expected_name = "foo (bar)"
55
- assert payload == {
56
- "id": expected_name,
57
- "api_name": "bar",
58
- "label": "foo",
59
- "schema_id": SCHEMA_NAME,
60
- "table_name": expected_name,
61
- "description": None,
62
- "tags": [],
63
- "type": "TABLE",
61
+ def test__name():
62
+ unique_sobject = {
63
+ "Label": "First Name",
64
+ "QualifiedApiName": "first_name",
65
+ _HAS_DUPLICATE_KEY: False,
64
66
  }
67
+ assert _name(unique_sobject) == "First Name"
65
68
 
69
+ duplicate_sobject = {
70
+ "Label": "First Name",
71
+ "QualifiedApiName": "first_name",
72
+ _HAS_DUPLICATE_KEY: True,
73
+ }
74
+ assert _name(duplicate_sobject) == "First Name (first_name)"
75
+
76
+ empty_label_sobject = {
77
+ "Label": "",
78
+ "QualifiedApiName": "empty_label",
79
+ _HAS_DUPLICATE_KEY: False,
80
+ }
81
+ assert _name(empty_label_sobject) == "empty_label"
66
82
 
67
- def test__by_label():
68
- a, b, c, a_prime = _example_sobjects()
69
- sobjects = [a, b, c, a_prime]
70
- by_label = _by_label(sobjects)
71
- assert by_label == {"a": [a, a_prime], "b": [b], "c": [c]}
83
+
84
+ def test__detect_duplicates():
85
+ objects = [
86
+ {"Label": "Foo"},
87
+ {"Label": "Bar"},
88
+ {"Label": "Foo"},
89
+ ]
90
+
91
+ objects = _detect_duplicates(objects)
92
+ assert objects == [
93
+ {"Label": "Foo", _HAS_DUPLICATE_KEY: True},
94
+ {"Label": "Bar", _HAS_DUPLICATE_KEY: False},
95
+ {"Label": "Foo", _HAS_DUPLICATE_KEY: True},
96
+ ]
72
97
 
73
98
 
74
99
  def test_salesforce_formatter_tables():
75
- sobjects = [*_example_sobjects()]
100
+ sobjects = [*_tables_sobjects()]
76
101
  tables = SalesforceFormatter.tables(sobjects)
77
102
  expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
78
103
  payload_names = {t["table_name"] for t in tables}
79
104
  assert payload_names == expected_names
105
+
106
+
107
+ def test_salesforce_formatter_columns():
108
+ sobjects = _columns_sobjects()
109
+ columns = SalesforceFormatter.columns(sobjects)
110
+ column_ids = {c["id"] for c in columns}
111
+ expected_column_ids = {
112
+ "table_1.First Name (owner_name)",
113
+ "table_1.First Name (editor_name)",
114
+ "table_2.Foo Bar",
115
+ }
116
+ assert column_ids == expected_column_ids
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.20.0
3
+ Version: 0.20.4
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -208,6 +208,22 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
208
208
 
209
209
  # Changelog
210
210
 
211
+ ## 0.20.4 - 2024-10-09
212
+
213
+ * Salesforce warehouse: `Labels` instead of `api_names` for columns
214
+
215
+ ## 0.20.3 - 2024-10-03
216
+
217
+ * Looker: no longer extract `as_html` dashboard elements
218
+
219
+ ## 0.20.2 - 2024-09-24
220
+
221
+ * Thoughtspot: Adding connector
222
+
223
+ ## 0.20.1 - 2024-09-23
224
+
225
+ * Power BI: Improved client based on APIClient
226
+
211
227
  ## 0.20.0 - 2024-09-23
212
228
 
213
229
  * Switch to Tableau revamped connector
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=_iTzy8VrdNYmYKWXfGfPMIlSenr4M7LCvoE1K0H96co,13601
1
+ CHANGELOG.md,sha256=CzVaQbFAS2hlZE2ak7DTYHWBNjMaC59e8UK7Q9p10tw,13905
2
2
  Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -24,6 +24,7 @@ castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_
24
24
  castor_extractor/commands/extract_snowflake.py,sha256=vYiruxRoo--GeMemOGsSE1w9kcKTh_y4E165HtMVzkM,1982
25
25
  castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
26
26
  castor_extractor/commands/extract_tableau.py,sha256=VUb_1Y85EzfF1f9OaCQQt8kFYBdp0u31Mw1Wm2fkxWs,1221
27
+ castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
27
28
  castor_extractor/commands/file_check.py,sha256=VSD84kpQKf7b0wJOhUgkJQ9n4mK3v52sjMWL7wkNYa0,2667
28
29
  castor_extractor/commands/upload.py,sha256=WLDI3zDmK2CjtbxiMWX2mZGjxx8DozfCw6tLE3CAMcE,1833
29
30
  castor_extractor/file_checker/__init__.py,sha256=OSt6YLhUT42U_Cp3LCLHMVruwDkksL75Ij13X2UPnVk,119
@@ -73,11 +74,11 @@ castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2
73
74
  castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
74
75
  castor_extractor/utils/client/api/auth.py,sha256=QDLM5h1zGibLaKyATxLF0gycg01SE92G-Y69f_YBClc,1896
75
76
  castor_extractor/utils/client/api/auth_test.py,sha256=NoZYsz7bcCyWBZdMF1TaOuK-s1j09DhTRyM4GSUW_YQ,1311
76
- castor_extractor/utils/client/api/client.py,sha256=0E5GG5Yxk-J5B11YdeIcccYk7jAfuQoWJIz5ljMGYUE,4275
77
+ castor_extractor/utils/client/api/client.py,sha256=_XkOMbkit0_YYJ2tY1rYrEuoPTUP826CJPnI9OLgmuQ,4434
77
78
  castor_extractor/utils/client/api/client_test.py,sha256=FM3ZxsLLfMOBn44cXX6FIgnA31-5TTNIyp9D4LBwtXE,1222
78
79
  castor_extractor/utils/client/api/pagination.py,sha256=Efg3P9ct_U5rtgXijMGV05oQxSzjldEopECWjIFWerM,2439
79
80
  castor_extractor/utils/client/api/pagination_test.py,sha256=jCOgXFXrH-jrCxe2dfk80ZksJF-EtmpJPU11BGabsqk,1385
80
- castor_extractor/utils/client/api/safe_request.py,sha256=SeBteAK8KhBjXldIdyUpkZphf9ktjzbvBM49AXrvD0g,1686
81
+ castor_extractor/utils/client/api/safe_request.py,sha256=dh69Uv9LMUGKDnxDnBYEYMEdTWq2GHLWD3ZwXtQP3So,1787
81
82
  castor_extractor/utils/client/api/safe_request_test.py,sha256=LqS5FBxs6lLLcTkcgxIoLb6OinxShHXR5y4CWZpwmwg,2005
82
83
  castor_extractor/utils/client/api/utils.py,sha256=jr8CWf48cIp8QP1P7oZ1zg9WaGlDO3mqCWgQKdEcpyc,238
83
84
  castor_extractor/utils/client/api/utils_test.py,sha256=a5aL-pCwa74C8Ne7OT169Bjp8WPDV5Fl8MxNxAllHJg,514
@@ -144,7 +145,7 @@ castor_extractor/visualization/looker/__init__.py,sha256=mem0020YeP4_5zDnqRXOW3g
144
145
  castor_extractor/visualization/looker/api/__init__.py,sha256=HDLsLy3kDWHIplAzLl1_u_bvGlgY6cuplf8myJTdfTg,169
145
146
  castor_extractor/visualization/looker/api/client.py,sha256=xp7wV59UsrXQXrkR-vB9YH78aPu2rAfwNRFxXegJluo,11283
146
147
  castor_extractor/visualization/looker/api/client_test.py,sha256=a80DpBOorFumXEA3D_qHuRZJqR51-DUtbz65XSLuSHc,1977
147
- castor_extractor/visualization/looker/api/constants.py,sha256=pZpq09tqcGi2Vh8orXxn9eil8ewfPUOLKfVuqgV2W-A,4126
148
+ castor_extractor/visualization/looker/api/constants.py,sha256=wnpEtZNbvTKEsLRCSdDUOru2Y6uIFyrBt1e5Hp9T7J4,4021
148
149
  castor_extractor/visualization/looker/api/credentials.py,sha256=dnEMW-d-g4N_JJhkXd-CJcnKLA1zBNMbgnELL_-guNI,972
149
150
  castor_extractor/visualization/looker/api/extraction_parameters.py,sha256=53tMtYHxlgALWuKr9w-lOE0xHIqKLvIQHl4w5wufjbU,1284
150
151
  castor_extractor/visualization/looker/api/sdk.py,sha256=KEhVCpQ__K9yTxSoIDG5y1FFuAhmeHo65pvxh7g90Ts,1600
@@ -192,17 +193,18 @@ castor_extractor/visualization/mode/client/constants.py,sha256=_Si5AF6VnpoSfnNNg
192
193
  castor_extractor/visualization/mode/client/credentials.py,sha256=ptIpCCpoNt06yYaWQgl3Xu78_jVMoqsqWAGqQXVFZlo,606
193
194
  castor_extractor/visualization/mode/errors.py,sha256=SKpFT2AiLOuWx2VRLyO7jbAiKcGDFXXrsebpNEKtr0E,1495
194
195
  castor_extractor/visualization/mode/extract.py,sha256=g_X7k8L8MldFPbuwOrnyNMF3BEH1r-IAAgNmi3KLF-U,1623
195
- castor_extractor/visualization/powerbi/__init__.py,sha256=VylJP6kw4yd2zGj31V-U9UXdhnPS9MK2Fz7Sd9KTkKI,119
196
- castor_extractor/visualization/powerbi/assets.py,sha256=SASUjxtoOMag3NAlZfhpCy0sLap7WfENEMaEZuBrw6o,801
197
- castor_extractor/visualization/powerbi/client/__init__.py,sha256=ewaEKS_shQlBbCpf-12J-bx3aUIpxbFZRJUL4eNOOno,97
198
- castor_extractor/visualization/powerbi/client/constants.py,sha256=gW2OdZwPvK_VjzNSZpFYKXW4dyYlSu9h4bIhYnIpXB4,2352
199
- castor_extractor/visualization/powerbi/client/credentials.py,sha256=z2p3KbA095LFV8XCGV6xLdHuPgOy5sQwCIAjkPgl5n8,772
200
- castor_extractor/visualization/powerbi/client/credentials_test.py,sha256=Zp7upzTUoGSArg-ujP0d9dGx31OMubIuA3S8X5Flv9Y,1016
201
- castor_extractor/visualization/powerbi/client/rest.py,sha256=YvDa-jnYVtONibRq6ifw26iphLaHZfGLz5qEmvPNnXI,9782
202
- castor_extractor/visualization/powerbi/client/rest_test.py,sha256=WMd8042r0nbUZECCVVC9JpJuNica2qlQBBbM8QuYfcQ,8528
203
- castor_extractor/visualization/powerbi/client/utils.py,sha256=0RcoWcKOdvIGH4f3lYDvufmiMo4tr_ABFlITSrvXjTs,541
204
- castor_extractor/visualization/powerbi/client/utils_test.py,sha256=ULHL2JLrcv0xjW2r7QF_ce2OaGeeSzajkMDywJ8ZdVA,719
205
- castor_extractor/visualization/powerbi/extract.py,sha256=OPF2QxP44iruQWARmpAx1HSDj7NLadPApIWVl1yRVZI,1101
196
+ castor_extractor/visualization/powerbi/__init__.py,sha256=AJnmfdmm2mGaInWJkUfZxRqrI7dBkTUSebpow05g5zo,135
197
+ castor_extractor/visualization/powerbi/assets.py,sha256=4VtYLgY81yQ3WzOEDipyK4zkS4xrIY9wjJBO1CeLpb4,932
198
+ castor_extractor/visualization/powerbi/client/__init__.py,sha256=8Bzhd9Z0ebVg2gDchXCOPa80Yqlq_9oCjbGi8u1M6J0,93
199
+ castor_extractor/visualization/powerbi/client/authentication.py,sha256=fz0v9qxeADwA1jiS9UzAQN5mA5kmZT53onlcWon2RGw,892
200
+ castor_extractor/visualization/powerbi/client/client.py,sha256=BDV0m00baYJLK4AyAP-TJ7rp2iEOk0ZsrPSIoSviEHI,7188
201
+ castor_extractor/visualization/powerbi/client/client_test.py,sha256=6NtpcKZCxBWyJO3phnVgE70Wmunb6tWsdXikkReJ02E,5539
202
+ castor_extractor/visualization/powerbi/client/constants.py,sha256=88R_aGachNNUZh6OSH2fkDwZtY4KTStzKm_g7HNCqqo,387
203
+ castor_extractor/visualization/powerbi/client/credentials.py,sha256=ueJ6AySVuigwGxIeQ7tGD2nh0UV1PnhKIkCCqFDvGBw,801
204
+ castor_extractor/visualization/powerbi/client/credentials_test.py,sha256=TzFqxsWVQ3sXR_n0bJsexK9Uz7ceXCEPVqDGWTJzW60,993
205
+ castor_extractor/visualization/powerbi/client/endpoints.py,sha256=WG1JRJ4FznUOQaUCWzjBqRnHNxQKv1-b2ZAnQVwQEVg,2058
206
+ castor_extractor/visualization/powerbi/client/pagination.py,sha256=OZMjoDQPRGMoWd9QcKKrPh3aErJR20SHlrTqY_siLkk,755
207
+ castor_extractor/visualization/powerbi/extract.py,sha256=0YCNSeTqcXSBbrl9g5dJUv8oMm7r-NT8tcfB-IdgPo8,1333
206
208
  castor_extractor/visualization/qlik/__init__.py,sha256=u6lIfm_WOykBwt6SlaB7C0Dtx37XBliUbM5oWv26gC8,177
207
209
  castor_extractor/visualization/qlik/assets.py,sha256=cG3Cqrj2s4inAqfW6dOaxRape2RPiCeGSYjKsRJRLLo,1657
208
210
  castor_extractor/visualization/qlik/client/__init__.py,sha256=5O5N9Jrt3d99agFEJ28lKWs2KkDaXK-lZ07IUtLj56M,130
@@ -282,6 +284,15 @@ castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=-V3To
282
284
  castor_extractor/visualization/tableau_revamp/client/rest_fields.py,sha256=gx39X1zMfRVpjmFbgvbgbvtlE0QwxOtk8rZFsIqeGRI,978
283
285
  castor_extractor/visualization/tableau_revamp/constants.py,sha256=thS935pJyuZkdciM2EFHbIuTqSFYfB3YGCJYJ_Ls294,55
284
286
  castor_extractor/visualization/tableau_revamp/extract.py,sha256=BPy38rFjGG6Nh1eDFeCckE4RHaO-bWW2uhXh7wm8mKk,1368
287
+ castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
288
+ castor_extractor/visualization/thoughtspot/assets.py,sha256=lPRvXk0PKybgLv1AcDVxg-ssf4XLTs0biRqLrqC2TzU,196
289
+ castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
290
+ castor_extractor/visualization/thoughtspot/client/client.py,sha256=EaJ0x87Ci5-XaPF9x7Gko2efuOmvGvVp2ViLkqlmk1I,3698
291
+ castor_extractor/visualization/thoughtspot/client/credentials.py,sha256=fp4YHiZy-dstWiLr5c4kFU9SyPK5rd2nCeh8k5sVRpM,462
292
+ castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=u3FRkmG6j5OIMEeXWZcgRObP8JeC4EutIJEeitNV44c,330
293
+ castor_extractor/visualization/thoughtspot/client/utils.py,sha256=54pC7t4-haWrJNPu4R7ef5dbd4zvMe3aep6bP61MglM,874
294
+ castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=-5ZaEYpQSrIp1-Sx-ViQOLPlv2LoOajEs2mE5YNi_tU,1887
295
+ castor_extractor/visualization/thoughtspot/extract.py,sha256=hpKUpwnAeu3_fPrtmAt6UhB04U8EKZgL7gJp0H7KZoM,1334
285
296
  castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
286
297
  castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
287
298
  castor_extractor/warehouse/abstract/asset.py,sha256=Qs7T2Iw7KHgWVT2aAoBfCQ8tB143cUZY-DRUSkpgvGU,2689
@@ -364,11 +375,11 @@ castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6Wfw
364
375
  castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
365
376
  castor_extractor/warehouse/redshift/query.py,sha256=0C81rkt2cpkWrJIxxwALDyqr-49vlqQM04y_N6wwStc,540
366
377
  castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
367
- castor_extractor/warehouse/salesforce/client.py,sha256=NbbXTi_eX0ge815FgsiWh4uUnvZMOAl9_mXA_e172_0,3281
378
+ castor_extractor/warehouse/salesforce/client.py,sha256=-9WHcQwEMrpGRQ9CN-bsRSR2Tnx9d-f_FtV4ntsf71w,3287
368
379
  castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
369
380
  castor_extractor/warehouse/salesforce/extract.py,sha256=GaxkGWhdksDT_rlT24KX8DMpWnhKlhDMAUvBPGalli0,3454
370
- castor_extractor/warehouse/salesforce/format.py,sha256=eiPM_4i_m3FEg_2jkMYlhaRBg3gTvV-9xQuk8ghJZiM,3289
371
- castor_extractor/warehouse/salesforce/format_test.py,sha256=6xDtCxNqvLo5JeHCtXyAun62WMzfVaVsvvMGXXfGgmA,2254
381
+ castor_extractor/warehouse/salesforce/format.py,sha256=DlPD4BQax2RmdDDucw1QbDUTUm2N0CzI7Gc9GymNOYA,3370
382
+ castor_extractor/warehouse/salesforce/format_test.py,sha256=3_OzI0GB3YVEw33ldXCcLG5NwIRZziQaCrAawT4_0g0,3266
372
383
  castor_extractor/warehouse/salesforce/pagination.py,sha256=m1S9JRNf6Oe-6dDghYUY5wwTzGzKW5H9pE60PCXMha0,920
373
384
  castor_extractor/warehouse/salesforce/soql.py,sha256=XB8ohKwHFfC4Xger7Y84DXLW17IJDye_bZ3FL6DCcOI,1188
374
385
  castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
@@ -402,8 +413,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
402
413
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
403
414
  castor_extractor/warehouse/sqlserver/query.py,sha256=j_d5-HMnzBouwGfywVZMRSSwbXzPvzDWlFCZmvxcoGQ,539
404
415
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
405
- castor_extractor-0.20.0.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
406
- castor_extractor-0.20.0.dist-info/METADATA,sha256=u6XUIwUCR1lLswRR_mMI9sm9zXrqIbE05MGVnAPLOBA,20819
407
- castor_extractor-0.20.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
408
- castor_extractor-0.20.0.dist-info/entry_points.txt,sha256=X_pDYOmhUUMbiAD9h2GZveuGdT8UgL38KJqP44xkvqo,1495
409
- castor_extractor-0.20.0.dist-info/RECORD,,
416
+ castor_extractor-0.20.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
417
+ castor_extractor-0.20.4.dist-info/METADATA,sha256=YcFx5O-gccq_JevTWl9xfeE5LGf5baiUKHfPrG1QX28,21123
418
+ castor_extractor-0.20.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
419
+ castor_extractor-0.20.4.dist-info/entry_points.txt,sha256=IVGy_oM8VjzADMAxzmiNJTYYidTCsI98MpO_mkXjkqE,1573
420
+ castor_extractor-0.20.4.dist-info/RECORD,,
@@ -18,6 +18,7 @@ castor-extract-sigma=castor_extractor.commands.extract_sigma:main
18
18
  castor-extract-snowflake=castor_extractor.commands.extract_snowflake:main
19
19
  castor-extract-sqlserver=castor_extractor.commands.extract_sqlserver:main
20
20
  castor-extract-tableau=castor_extractor.commands.extract_tableau:main
21
+ castor-extract-thoughtspot=castor_extractor.commands.extract_thoughtspot:main
21
22
  castor-file-check=castor_extractor.commands.file_check:main
22
23
  castor-upload=castor_extractor.commands.upload:main
23
24
 
@@ -1,305 +0,0 @@
1
- import logging
2
- from datetime import date, datetime
3
- from time import sleep
4
- from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
5
-
6
- import msal # type: ignore
7
- import requests
8
-
9
- from ....utils import at_midnight, format_date, yesterday
10
- from ..assets import PowerBiAsset
11
- from .constants import (
12
- DEFAULT_TIMEOUT_IN_SECS,
13
- GET,
14
- POST,
15
- SCAN_READY,
16
- Batches,
17
- Keys,
18
- QueryParams,
19
- Urls,
20
- )
21
- from .credentials import PowerbiCredentials
22
- from .utils import batch_size_is_valid_or_assert, datetime_is_recent_or_assert
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
- def _time_filter(day: Optional[date]) -> Tuple[datetime, datetime]:
28
- target_day = day or yesterday()
29
- start = at_midnight(target_day)
30
- end = datetime.combine(target_day, datetime.max.time())
31
- return start, end
32
-
33
-
34
- def _url(
35
- day: Optional[date],
36
- continuation_uri: Optional[str],
37
- ) -> str:
38
- if continuation_uri:
39
- return continuation_uri
40
-
41
- url = Urls.ACTIVITY_EVENTS
42
- start, end = _time_filter(day)
43
- url += "?$filter=Activity eq 'viewreport'"
44
- url += f"&startDateTime='{format_date(start)}'"
45
- url += f"&endDateTime='{format_date(end)}'"
46
- return url
47
-
48
-
49
- class Client:
50
- """
51
- PowerBI rest admin api
52
- https://learn.microsoft.com/en-us/rest/api/power-bi/admin
53
- """
54
-
55
- def __init__(self, credentials: PowerbiCredentials):
56
- self.creds = credentials
57
- client_app = f"{Urls.CLIENT_APP_BASE}{self.creds.tenant_id}"
58
- self.app = msal.ConfidentialClientApplication(
59
- client_id=self.creds.client_id,
60
- authority=client_app,
61
- client_credential=self.creds.secret,
62
- )
63
-
64
- def _access_token(self) -> dict:
65
- token = self.app.acquire_token_for_client(scopes=self.creds.scopes)
66
-
67
- if Keys.ACCESS_TOKEN not in token:
68
- raise ValueError(f"No access token in token response: {token}")
69
-
70
- return token
71
-
72
- def _header(self) -> Dict:
73
- """Return header used in following rest api call"""
74
- token = self._access_token()
75
- return {"Authorization": f"Bearer {token[Keys.ACCESS_TOKEN]}"}
76
-
77
- def _call(
78
- self,
79
- url: str,
80
- method: str = GET,
81
- *,
82
- params: Optional[Dict] = None,
83
- data: Optional[dict] = None,
84
- processor: Optional[Callable] = None,
85
- ) -> Any:
86
- """
87
- Make either a get or a post http request.Request, by default
88
- result.json is returned. Optionally you can provide a processor callback
89
- to transform the result.
90
- """
91
- logger.debug(f"Calling {method} on {url}")
92
- result = requests.request(
93
- method,
94
- url,
95
- headers=self._header(),
96
- params=params,
97
- data=data,
98
- )
99
- result.raise_for_status()
100
-
101
- if processor:
102
- return processor(result)
103
-
104
- return result.json()
105
-
106
- def _get(
107
- self,
108
- url: str,
109
- *,
110
- params: Optional[Dict] = None,
111
- processor: Optional[Callable] = None,
112
- ) -> Any:
113
- return self._call(url, GET, params=params, processor=processor)
114
-
115
- def _post(
116
- self,
117
- url: str,
118
- *,
119
- params: Optional[dict],
120
- data: Optional[dict],
121
- processor: Optional[Callable] = None,
122
- ) -> Any:
123
- return self._call(
124
- url,
125
- POST,
126
- params=params,
127
- data=data,
128
- processor=processor,
129
- )
130
-
131
- def _workspace_ids(
132
- self,
133
- modified_since: Optional[datetime] = None,
134
- ) -> List[str]:
135
- """
136
- Get workspaces ids from powerBI admin API.
137
- If modified_since, take only workspaces that have been modified since
138
-
139
- more: https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-get-modified-workspaces
140
- """
141
-
142
- def result_callback(call_result: requests.models.Response) -> List[str]:
143
- return [x["id"] for x in call_result.json()]
144
-
145
- params: Dict[str, Union[bool, str]] = {
146
- Keys.INACTIVE_WORKSPACES: True,
147
- Keys.PERSONAL_WORKSPACES: True,
148
- }
149
-
150
- if modified_since:
151
- datetime_is_recent_or_assert(modified_since)
152
- modified_since_iso = f"{modified_since.isoformat()}0Z"
153
- params[Keys.MODIFIED_SINCE] = modified_since_iso
154
-
155
- result = self._get(
156
- Urls.WORKSPACE_IDS,
157
- params=params,
158
- processor=result_callback,
159
- )
160
-
161
- return result
162
-
163
- def _create_scan(self, workspaces_ids: List[str]) -> int:
164
- batch_size_is_valid_or_assert(workspaces_ids)
165
- request_body = {"workspaces": workspaces_ids}
166
- params = QueryParams.METADATA_SCAN
167
- scan_id = self._post(
168
- Urls.METADATA_POST,
169
- params=params,
170
- data=request_body,
171
- )
172
- return scan_id[Keys.ID]
173
-
174
- def _wait_for_scan_result(self, scan_id: int) -> bool:
175
- url = f"{Urls.METADATA_WAIT}/{scan_id}"
176
- waiting_seconds = 0
177
- sleep_seconds = 1
178
- while True:
179
- result = self._get(url, processor=lambda x: x)
180
- if result.status_code != 200:
181
- return False
182
- if result.json()[Keys.STATUS] == SCAN_READY:
183
- logger.info(f"scan {scan_id} ready")
184
- return True
185
- if waiting_seconds >= DEFAULT_TIMEOUT_IN_SECS:
186
- break
187
- waiting_seconds += sleep_seconds
188
- logger.info(
189
- f"Waiting {sleep_seconds} sec for scan {scan_id} to be ready…",
190
- )
191
- sleep(sleep_seconds)
192
- return False
193
-
194
- def _get_scan(self, scan_id: int) -> List[dict]:
195
- url = f"{Urls.METADATA_GET}/{scan_id}"
196
- return self._get(url)[Keys.WORKSPACES]
197
-
198
- def _activity_events(
199
- self,
200
- *,
201
- day: Optional[date] = None,
202
- continuation_uri: Optional[str] = None,
203
- ) -> List[Dict]:
204
- """
205
- Returns a list of activity events for the organization.
206
- https://learn.microsoft.com/en-us/power-bi/admin/service-admin-auditing#activityevents-rest-api
207
- - when no day is specified, fallback is yesterday
208
- - continuation_uri allows to fetch paginated data (internal usage)
209
- """
210
- url = _url(day, continuation_uri)
211
- answer = self._get(url)
212
- activity_events = answer[Keys.ACTIVITY_EVENT_ENTITIES]
213
- is_last = answer[Keys.LAST_RESULT_SET]
214
- assert isinstance(is_last, bool)
215
- if is_last:
216
- return activity_events
217
-
218
- # there are more data to fetch
219
- # https://learn.microsoft.com/en-us/rest/api/power-bi/admin/get-activity-events#get-the-next-set-of-audit-activity-events-by-sending-the-continuation-token-to-the-api-example
220
- continuation_uri = answer[Keys.CONTINUATION_URI]
221
- rest = self._activity_events(continuation_uri=continuation_uri)
222
- activity_events.extend(rest)
223
- return activity_events
224
-
225
- def _datasets(self) -> List[Dict]:
226
- """
227
- Returns a list of datasets for the organization.
228
- https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin
229
- """
230
- return self._get(Urls.DATASETS)[Keys.VALUE]
231
-
232
- def _reports(self) -> List[Dict]:
233
- """
234
- Returns a list of reports for the organization.
235
- https://learn.microsoft.com/en-us/rest/api/power-bi/admin/reports-get-reports-as-admin
236
- """
237
- reports = self._get(Urls.REPORTS)[Keys.VALUE]
238
- for report in reports:
239
- report_id = report.get("id")
240
- try:
241
- url = Urls.REPORTS + f"/{report_id}/pages"
242
- pages = self._get(url)[Keys.VALUE]
243
- report["pages"] = pages
244
- except (requests.HTTPError, requests.exceptions.Timeout) as e:
245
- logger.debug(e)
246
- continue
247
- return reports
248
-
249
- def _dashboards(self) -> List[Dict]:
250
- """
251
- Returns a list of dashboards for the organization.
252
- https://learn.microsoft.com/en-us/rest/api/power-bi/admin/dashboards-get-dashboards-as-admin
253
- """
254
- return self._get(Urls.DASHBOARD)[Keys.VALUE]
255
-
256
- def _metadata(
257
- self,
258
- modified_since: Optional[datetime] = None,
259
- ) -> Iterator[List[Dict]]:
260
- """
261
- Fetch metadata by workspace.
262
- https://learn.microsoft.com/en-us/power-bi/enterprise/service-admin-metadata-scanning
263
- """
264
- ids = self._workspace_ids(modified_since)
265
-
266
- for ix in range(0, len(ids), Batches.METADATA):
267
- batch_ids = [w_id for w_id in ids[ix : ix + Batches.METADATA]]
268
- scan_id = self._create_scan(batch_ids)
269
- self._wait_for_scan_result(scan_id)
270
- yield self._get_scan(scan_id)
271
-
272
- def test_connection(self) -> None:
273
- """Use credentials & verify requesting the API doesn't raise an error"""
274
- self._header()
275
-
276
- def fetch(
277
- self,
278
- asset: PowerBiAsset,
279
- *,
280
- modified_since: Optional[datetime] = None,
281
- day: Optional[date] = None,
282
- ) -> List[Dict]:
283
- """
284
- Given a PowerBi asset, returns the corresponding data using the
285
- appropriate client.
286
- """
287
- logger.info(f"Starting extraction of {asset}")
288
- asset = PowerBiAsset(asset)
289
-
290
- if asset == PowerBiAsset.ACTIVITY_EVENTS:
291
- return self._activity_events(day=day)
292
-
293
- if asset == PowerBiAsset.DATASETS:
294
- return self._datasets()
295
-
296
- if asset == PowerBiAsset.DASHBOARDS:
297
- return self._dashboards()
298
-
299
- if asset == PowerBiAsset.REPORTS:
300
- return self._reports()
301
-
302
- assert asset == PowerBiAsset.METADATA
303
- return [
304
- item for batch in self._metadata(modified_since) for item in batch
305
- ]