castor-extractor 0.20.0__py3-none-any.whl → 0.20.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (43) hide show
  1. CHANGELOG.md +20 -0
  2. castor_extractor/commands/extract_redshift.py +6 -0
  3. castor_extractor/commands/extract_thoughtspot.py +18 -0
  4. castor_extractor/utils/client/api/client.py +7 -2
  5. castor_extractor/utils/client/api/safe_request.py +6 -3
  6. castor_extractor/visualization/looker/api/constants.py +0 -4
  7. castor_extractor/visualization/powerbi/__init__.py +1 -1
  8. castor_extractor/visualization/powerbi/assets.py +7 -1
  9. castor_extractor/visualization/powerbi/client/__init__.py +2 -3
  10. castor_extractor/visualization/powerbi/client/authentication.py +27 -0
  11. castor_extractor/visualization/powerbi/client/client.py +207 -0
  12. castor_extractor/visualization/powerbi/client/client_test.py +173 -0
  13. castor_extractor/visualization/powerbi/client/constants.py +0 -67
  14. castor_extractor/visualization/powerbi/client/credentials.py +3 -4
  15. castor_extractor/visualization/powerbi/client/credentials_test.py +3 -4
  16. castor_extractor/visualization/powerbi/client/endpoints.py +65 -0
  17. castor_extractor/visualization/powerbi/client/pagination.py +32 -0
  18. castor_extractor/visualization/powerbi/extract.py +14 -9
  19. castor_extractor/visualization/thoughtspot/__init__.py +3 -0
  20. castor_extractor/visualization/thoughtspot/assets.py +9 -0
  21. castor_extractor/visualization/thoughtspot/client/__init__.py +2 -0
  22. castor_extractor/visualization/thoughtspot/client/client.py +120 -0
  23. castor_extractor/visualization/thoughtspot/client/credentials.py +18 -0
  24. castor_extractor/visualization/thoughtspot/client/endpoints.py +12 -0
  25. castor_extractor/visualization/thoughtspot/client/utils.py +25 -0
  26. castor_extractor/visualization/thoughtspot/client/utils_test.py +57 -0
  27. castor_extractor/visualization/thoughtspot/extract.py +49 -0
  28. castor_extractor/warehouse/redshift/extract.py +10 -1
  29. castor_extractor/warehouse/redshift/extract_test.py +26 -0
  30. castor_extractor/warehouse/redshift/queries/query_serverless.sql +69 -0
  31. castor_extractor/warehouse/redshift/query.py +12 -1
  32. castor_extractor/warehouse/salesforce/client.py +1 -1
  33. castor_extractor/warehouse/salesforce/format.py +40 -30
  34. castor_extractor/warehouse/salesforce/format_test.py +61 -24
  35. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.5.dist-info}/METADATA +21 -1
  36. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.5.dist-info}/RECORD +39 -26
  37. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.5.dist-info}/entry_points.txt +1 -0
  38. castor_extractor/visualization/powerbi/client/rest.py +0 -305
  39. castor_extractor/visualization/powerbi/client/rest_test.py +0 -290
  40. castor_extractor/visualization/powerbi/client/utils.py +0 -19
  41. castor_extractor/visualization/powerbi/client/utils_test.py +0 -24
  42. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.5.dist-info}/LICENCE +0 -0
  43. {castor_extractor-0.20.0.dist-info → castor_extractor-0.20.5.dist-info}/WHEEL +0 -0
CHANGELOG.md CHANGED
@@ -1,6 +1,26 @@
1
1
 
2
2
  # Changelog
3
3
 
4
+ ## 0.20.5 - 2024-10-09
5
+
6
+ * Redshift: enable extraction from a Redshift Serverless instance
7
+
8
+ ## 0.20.4 - 2024-10-09
9
+
10
+ * Salesforce warehouse: `Labels` instead of `api_names` for columns
11
+
12
+ ## 0.20.3 - 2024-10-03
13
+
14
+ * Looker: no longer extract `as_html` dashboard elements
15
+
16
+ ## 0.20.2 - 2024-09-24
17
+
18
+ * Thoughtspot: Adding connector
19
+
20
+ ## 0.20.1 - 2024-09-23
21
+
22
+ * Power BI: Improved client based on APIClient
23
+
4
24
  ## 0.20.0 - 2024-09-23
5
25
 
6
26
  * Switch to Tableau revamped connector
@@ -23,6 +23,11 @@ def main():
23
23
  action="store_true",
24
24
  help="Skips files already extracted instead of replacing them",
25
25
  )
26
+ parser.add_argument(
27
+ "--serverless",
28
+ action="store_true",
29
+ help="Enables extraction for Redshift Serverless",
30
+ )
26
31
  parser.set_defaults(skip_existing=False)
27
32
 
28
33
  args = parser.parse_args()
@@ -34,5 +39,6 @@ def main():
34
39
  user=args.user,
35
40
  password=args.password,
36
41
  output_directory=args.output,
42
+ serverless=args.serverless,
37
43
  skip_existing=args.skip_existing,
38
44
  )
@@ -0,0 +1,18 @@
1
+ import logging
2
+ from argparse import ArgumentParser
3
+
4
+ from castor_extractor.utils import parse_filled_arguments # type: ignore
5
+ from castor_extractor.visualization import thoughtspot # type: ignore
6
+
7
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
8
+
9
+
10
+ def main():
11
+ parser = ArgumentParser()
12
+
13
+ parser.add_argument("-b", "--base_url", help="base url")
14
+ parser.add_argument("-u", "--username", help="username")
15
+ parser.add_argument("-p", "--password", help="password")
16
+ parser.add_argument("-o", "--output", help="Directory to write to")
17
+
18
+ thoughtspot.extract_all(**parse_filled_arguments(parser))
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from http import HTTPStatus
3
- from typing import Dict, Literal, Optional, Tuple
3
+ from typing import Callable, Dict, Literal, Optional, Tuple
4
4
 
5
5
  import requests
6
6
  from requests import Response
@@ -137,12 +137,15 @@ class APIClient:
137
137
  endpoint: str,
138
138
  *,
139
139
  headers: Headers = None,
140
+ params: Optional[dict] = None,
140
141
  data: Optional[dict] = None,
141
142
  pagination_params: Optional[dict] = None,
143
+ handler: Optional[Callable] = None,
142
144
  ):
143
145
  response = self._call(
144
146
  method="POST",
145
147
  endpoint=endpoint,
148
+ params=params,
146
149
  data=data,
147
150
  pagination_params=pagination_params,
148
151
  headers=headers,
@@ -150,4 +153,6 @@ class APIClient:
150
153
  if response.status_code == HTTPStatus.UNAUTHORIZED:
151
154
  self._auth.refresh_token()
152
155
 
153
- return handle_response(response, safe_mode=self._safe_mode)
156
+ return handle_response(
157
+ response, safe_mode=self._safe_mode, handler=handler
158
+ )
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Any, List, Optional, Tuple, Union
2
+ from typing import Any, Callable, List, Optional, Tuple, Union
3
3
 
4
4
  from requests import HTTPError, Response
5
5
 
@@ -41,9 +41,10 @@ class RequestSafeMode:
41
41
  def handle_response(
42
42
  response: Response,
43
43
  safe_mode: Optional[RequestSafeMode] = None,
44
+ handler: Optional[Callable] = None,
44
45
  ) -> Any:
45
46
  """
46
- Util to handle a HTTP Response based on the response status code and the
47
+ Util to handle HTTP Response based on the response status code and the
47
48
  safe mode used
48
49
  """
49
50
  safe_mode = safe_mode if safe_mode else RequestSafeMode()
@@ -56,4 +57,6 @@ def handle_response(
56
57
  logger.error(f"Safe mode : skip request with error {e}")
57
58
  logger.debug(e, exc_info=True)
58
59
  return {}
59
- return response.json()
60
+ if not handler:
61
+ return response.json()
62
+ return handler(response)
@@ -32,14 +32,10 @@ DASHBOARD_FILTERS = (
32
32
  DASHBOARD_ELEMENTS = (
33
33
  "id",
34
34
  "body_text",
35
- "body_text_as_html",
36
35
  "note_text",
37
- "note_text_as_html",
38
36
  "subtitle_text",
39
- "subtitle_text_as_html",
40
37
  "title",
41
38
  "title_text",
42
- "title_text_as_html",
43
39
  "title_hidden",
44
40
  "type",
45
41
  {
@@ -1,3 +1,3 @@
1
1
  from .assets import PowerBiAsset
2
- from .client import Client, PowerbiCredentials, Urls
2
+ from .client import DEFAULT_SCOPE, PowerbiClient, PowerbiCredentials
3
3
  from .extract import extract_all
@@ -11,16 +11,19 @@ class PowerBiAsset(ExternalAsset):
11
11
  DATASETS = "datasets"
12
12
  DATASET_FIELDS = "dataset_fields"
13
13
  METADATA = "metadata"
14
+ PAGES = "pages"
14
15
  REPORTS = "reports"
15
16
  TABLES = "tables"
17
+ TILES = "tiles"
16
18
  USERS = "users"
17
19
 
18
20
  @classproperty
19
21
  def optional(cls) -> Set["PowerBiAsset"]:
20
22
  return {
21
- PowerBiAsset.DASHBOARDS,
22
23
  PowerBiAsset.DATASET_FIELDS,
24
+ PowerBiAsset.PAGES,
23
25
  PowerBiAsset.TABLES,
26
+ PowerBiAsset.TILES,
24
27
  PowerBiAsset.USERS,
25
28
  }
26
29
 
@@ -30,5 +33,8 @@ class PowerBiAsset(ExternalAsset):
30
33
  METADATA_ASSETS = (
31
34
  PowerBiAsset.DATASET_FIELDS,
32
35
  PowerBiAsset.TABLES,
36
+ PowerBiAsset.TILES,
33
37
  PowerBiAsset.USERS,
34
38
  )
39
+
40
+ REPORTS_ASSETS = (PowerBiAsset.PAGES,)
@@ -1,3 +1,2 @@
1
- from .constants import Urls
2
- from .credentials import PowerbiCredentials
3
- from .rest import Client
1
+ from .client import PowerbiClient
2
+ from .credentials import DEFAULT_SCOPE, PowerbiCredentials
@@ -0,0 +1,27 @@
1
+ import msal # type: ignore
2
+
3
+ from ....utils import BearerAuth
4
+ from .constants import Keys
5
+ from .credentials import PowerbiCredentials
6
+ from .endpoints import PowerBiEndpointFactory
7
+
8
+
9
+ class PowerBiBearerAuth(BearerAuth):
10
+ def __init__(self, credentials: PowerbiCredentials):
11
+ self.credentials = credentials
12
+ authority = PowerBiEndpointFactory.authority(self.credentials.tenant_id)
13
+ self.app = msal.ConfidentialClientApplication(
14
+ client_id=self.credentials.client_id,
15
+ authority=authority,
16
+ client_credential=self.credentials.secret,
17
+ )
18
+
19
+ def fetch_token(self):
20
+ token = self.app.acquire_token_for_client(
21
+ scopes=self.credentials.scopes
22
+ )
23
+
24
+ if Keys.ACCESS_TOKEN not in token:
25
+ raise ValueError(f"No access token in token response: {token}")
26
+
27
+ return token[Keys.ACCESS_TOKEN]
@@ -0,0 +1,207 @@
1
+ import logging
2
+ from datetime import date
3
+ from functools import partial
4
+ from time import sleep
5
+ from typing import Dict, Iterator, List, Optional, Union
6
+
7
+ import requests
8
+ from requests import HTTPError
9
+
10
+ from ....utils import (
11
+ APIClient,
12
+ fetch_all_pages,
13
+ )
14
+ from ..assets import PowerBiAsset
15
+ from .authentication import PowerBiBearerAuth
16
+ from .constants import Keys
17
+ from .credentials import PowerbiCredentials
18
+ from .endpoints import PowerBiEndpointFactory
19
+ from .pagination import PowerBiPagination
20
+
21
+ POWERBI_DEFAULT_TIMEOUT_S = 30
22
+ # The route we use to fetch workspaces info can retrieve a maximum of
23
+ # 100 workspaces per call
24
+ # More: https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-post-workspace-info#request-body
25
+ METADATA_BATCH_SIZE = 100
26
+ POWERBI_SCAN_STATUS_DONE = "Succeeded"
27
+ POWERBI_SCAN_SLEEP_S = 1
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class PowerbiClient(APIClient):
33
+ def __init__(
34
+ self,
35
+ credentials: PowerbiCredentials,
36
+ ):
37
+ auth = PowerBiBearerAuth(credentials=credentials)
38
+ super().__init__(
39
+ auth=auth,
40
+ timeout=POWERBI_DEFAULT_TIMEOUT_S,
41
+ )
42
+
43
+ def _activity_events(self, day: Optional[date] = None) -> Iterator[Dict]:
44
+ """
45
+ Returns a list of activity events for the organization.
46
+ https://learn.microsoft.com/en-us/power-bi/admin/service-admin-auditing#activityevents-rest-api
47
+ - when no day is specified, fallback is yesterday
48
+ """
49
+ request = partial(
50
+ self._get,
51
+ endpoint=PowerBiEndpointFactory.activity_events(day),
52
+ )
53
+ yield from fetch_all_pages(request, PowerBiPagination)
54
+
55
+ def _datasets(self) -> Iterator[Dict]:
56
+ """
57
+ Returns a list of datasets for the organization.
58
+ https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin
59
+ """
60
+ yield from self._get(PowerBiEndpointFactory.datasets())[Keys.VALUE]
61
+
62
+ def _dashboards(self) -> Iterator[Dict]:
63
+ """
64
+ Returns a list of dashboards for the organization.
65
+ https://learn.microsoft.com/en-us/rest/api/power-bi/admin/dashboards-get-dashboards-as-admin
66
+ """
67
+ yield from self._get(PowerBiEndpointFactory.dashboards())[Keys.VALUE]
68
+
69
+ def _reports(self) -> Iterator[Dict]:
70
+ """
71
+ Returns a list of reports for the organization.
72
+ https://learn.microsoft.com/en-us/rest/api/power-bi/admin/reports-get-reports-as-admin
73
+ """
74
+ reports_endpoint = PowerBiEndpointFactory.reports()
75
+ reports = self._get(reports_endpoint)[Keys.VALUE]
76
+
77
+ for report in reports:
78
+ report_id = report.get(Keys.ID)
79
+
80
+ try:
81
+ pages_endpoint = PowerBiEndpointFactory.pages(report_id)
82
+ pages = self._get(pages_endpoint)[Keys.VALUE]
83
+ report["pages"] = pages
84
+ except (requests.HTTPError, requests.exceptions.Timeout) as e:
85
+ logger.debug(e)
86
+ continue
87
+
88
+ return reports
89
+
90
+ def _workspace_ids(self) -> List[str]:
91
+ """
92
+ Get workspaces ids from powerBI admin API.
93
+ more: https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-get-modified-workspaces
94
+ """
95
+ params: Dict[str, Union[bool, str]] = {
96
+ Keys.INACTIVE_WORKSPACES: True,
97
+ Keys.PERSONAL_WORKSPACES: True,
98
+ }
99
+
100
+ response = self._get(
101
+ PowerBiEndpointFactory.workspace_ids(),
102
+ params=params,
103
+ )
104
+
105
+ return [x[Keys.ID] for x in response]
106
+
107
+ def _get_scan_result(self, scan_id: int) -> Iterator[Dict]:
108
+ endpoint = PowerBiEndpointFactory.metadata_scan_result(scan_id)
109
+ yield from self._get(endpoint)[Keys.WORKSPACES]
110
+
111
+ def _wait_for_scan_result(self, scan_id: int) -> bool:
112
+ """
113
+ Periodically checks the status of the metadata scan until the results
114
+ are ready.
115
+ """
116
+ endpoint = PowerBiEndpointFactory.metadata_scan_status(scan_id)
117
+ total_waiting_time_s = 0
118
+
119
+ while total_waiting_time_s < POWERBI_DEFAULT_TIMEOUT_S:
120
+ try:
121
+ result = self._get(endpoint)
122
+ except HTTPError as e:
123
+ logger.error(f"Scan {scan_id} failed. Error: {e}")
124
+ return False
125
+
126
+ if result[Keys.STATUS] == POWERBI_SCAN_STATUS_DONE:
127
+ logger.info(f"scan {scan_id} ready")
128
+ return True
129
+
130
+ total_waiting_time_s += POWERBI_SCAN_SLEEP_S
131
+ logger.info(
132
+ f"Waiting {POWERBI_SCAN_SLEEP_S} sec for scan {scan_id} to be ready…",
133
+ )
134
+ sleep(POWERBI_SCAN_SLEEP_S)
135
+
136
+ logger.warning(f"Scan {scan_id} timed out")
137
+ return False
138
+
139
+ def _create_scan(self, workspaces_ids: List[str]) -> int:
140
+ """
141
+ Tells the Power BI API to start an asynchronous metadata scan.
142
+ Returns the scan's ID.
143
+ https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-post-workspace-info
144
+ """
145
+ params = {
146
+ "datasetExpressions": True,
147
+ "datasetSchema": True,
148
+ "datasourceDetails": True,
149
+ "getArtifactUsers": True,
150
+ "lineage": True,
151
+ }
152
+ request_body = {"workspaces": workspaces_ids}
153
+ scan_id = self._post(
154
+ PowerBiEndpointFactory.metadata_create_scan(),
155
+ params=params,
156
+ data=request_body,
157
+ )
158
+ return scan_id[Keys.ID]
159
+
160
+ def _metadata(self) -> Iterator[Dict]:
161
+ """
162
+ Fetch metadata by workspace. The metadata scanning is asynchronous and
163
+ requires the following steps:
164
+ - create the asynchronous scan
165
+ - periodically check the scan status to know when it's finished
166
+ - get the actual scan results
167
+ https://learn.microsoft.com/en-us/power-bi/enterprise/service-admin-metadata-scanning
168
+ """
169
+ ids = self._workspace_ids()
170
+
171
+ for index in range(0, len(ids), METADATA_BATCH_SIZE):
172
+ batch_ids = ids[index : index + METADATA_BATCH_SIZE]
173
+ scan_id = self._create_scan(batch_ids)
174
+ self._wait_for_scan_result(scan_id)
175
+ yield from self._get_scan_result(scan_id)
176
+
177
+ def test_connection(self) -> None:
178
+ """Use credentials & verify requesting the API doesn't raise an error"""
179
+ self._auth.refresh_token()
180
+
181
+ def fetch(
182
+ self,
183
+ asset: PowerBiAsset,
184
+ *,
185
+ day: Optional[date] = None,
186
+ ) -> Iterator[Dict]:
187
+ """
188
+ Given a PowerBi asset, returns the corresponding data using the
189
+ appropriate client.
190
+ """
191
+ if asset == PowerBiAsset.ACTIVITY_EVENTS:
192
+ yield from self._activity_events(day=day)
193
+
194
+ elif asset == PowerBiAsset.DATASETS:
195
+ yield from self._datasets()
196
+
197
+ elif asset == PowerBiAsset.DASHBOARDS:
198
+ yield from self._dashboards()
199
+
200
+ elif asset == PowerBiAsset.REPORTS:
201
+ yield from self._reports()
202
+
203
+ elif asset == PowerBiAsset.METADATA:
204
+ yield from self._metadata()
205
+
206
+ else:
207
+ raise ValueError(f"This asset {asset} is unknown")
@@ -0,0 +1,173 @@
1
+ from datetime import date
2
+ from unittest.mock import Mock, call, patch
3
+
4
+ import pytest
5
+
6
+ from .authentication import msal
7
+ from .client import PowerbiClient
8
+ from .constants import Keys
9
+ from .credentials import PowerbiCredentials
10
+ from .endpoints import PowerBiEndpointFactory
11
+
12
+ FAKE_TENANT_ID = "IamFake"
13
+ FAKE_CLIENT_ID = "MeTwo"
14
+ FAKE_SECRET = "MeThree"
15
+
16
+
17
+ @pytest.fixture
18
+ def mock_msal():
19
+ with patch.object(msal, "ConfidentialClientApplication") as mock_app:
20
+ mock_app.return_value.acquire_token_for_client.return_value = {
21
+ "access_token": "fake_token"
22
+ }
23
+ yield mock_app
24
+
25
+
26
+ @pytest.fixture
27
+ def power_bi_client(mock_msal):
28
+ creds = PowerbiCredentials(
29
+ tenant_id=FAKE_TENANT_ID,
30
+ client_id=FAKE_CLIENT_ID,
31
+ secret=FAKE_SECRET,
32
+ )
33
+ return PowerbiClient(creds)
34
+
35
+
36
+ def test__access_token(power_bi_client, mock_msal):
37
+ # Valid token scenario
38
+ valid_token = "mock_token"
39
+ mock_response = {"access_token": valid_token}
40
+ returning_valid_token = Mock(return_value=mock_response)
41
+ mock_msal.return_value.acquire_token_for_client = returning_valid_token
42
+
43
+ assert power_bi_client._auth.fetch_token() == valid_token
44
+
45
+ # Invalid token scenario
46
+ invalid_response = {"not_access_token": "666"}
47
+ returning_invalid_token = Mock(return_value=invalid_response)
48
+ mock_msal.return_value.acquire_token_for_client = returning_invalid_token
49
+
50
+ with pytest.raises(ValueError):
51
+ power_bi_client._auth.fetch_token()
52
+
53
+
54
+ def test__datasets(power_bi_client):
55
+ with patch.object(power_bi_client, "_get") as mocked_get:
56
+ mocked_get.return_value = {"value": [{"id": 1, "type": "dataset"}]}
57
+ datasets = list(power_bi_client._datasets())
58
+ mocked_get.assert_called_with(PowerBiEndpointFactory.datasets())
59
+ assert datasets == [{"id": 1, "type": "dataset"}]
60
+
61
+
62
+ def test__dashboards(power_bi_client):
63
+ with patch.object(power_bi_client, "_get") as mocked_get:
64
+ mocked_get.return_value = {"value": [{"id": 1, "type": "dashboard"}]}
65
+ dashboards = list(power_bi_client._dashboards())
66
+ mocked_get.assert_called_with(PowerBiEndpointFactory.dashboards())
67
+ assert dashboards == [{"id": 1, "type": "dashboard"}]
68
+
69
+
70
+ def test__reports(power_bi_client):
71
+ with patch.object(power_bi_client, "_get") as mocked_get:
72
+ mocked_get.side_effect = [
73
+ {"value": [{"id": 1, "type": "report"}]},
74
+ {
75
+ "value": [
76
+ {"name": "page_name", "displayName": "page", "order": 0}
77
+ ]
78
+ },
79
+ ]
80
+ reports = list(power_bi_client._reports())
81
+ calls = [
82
+ call(PowerBiEndpointFactory.reports()),
83
+ call(PowerBiEndpointFactory.pages("1")),
84
+ ]
85
+ mocked_get.assert_has_calls(calls)
86
+ assert reports == [
87
+ {
88
+ "id": 1,
89
+ "type": "report",
90
+ "pages": [
91
+ {"name": "page_name", "displayName": "page", "order": 0}
92
+ ],
93
+ }
94
+ ]
95
+
96
+
97
+ def test__workspace_ids(power_bi_client):
98
+ with patch.object(power_bi_client, "_get") as mocked_get:
99
+ mocked_get.return_value = [{"id": 1000}, {"id": 1001}, {"id": 1003}]
100
+
101
+ ids = power_bi_client._workspace_ids()
102
+ assert ids == [1000, 1001, 1003]
103
+
104
+ params = {
105
+ Keys.INACTIVE_WORKSPACES: True,
106
+ Keys.PERSONAL_WORKSPACES: True,
107
+ }
108
+
109
+ mocked_get.assert_called_with(
110
+ PowerBiEndpointFactory.workspace_ids(),
111
+ params=params,
112
+ )
113
+
114
+
115
+ @patch.object(PowerbiClient, "_get_scan_result")
116
+ @patch.object(PowerbiClient, "_wait_for_scan_result")
117
+ @patch.object(PowerbiClient, "_create_scan")
118
+ @patch.object(PowerbiClient, "_workspace_ids")
119
+ def test__metadata(
120
+ mock_workspace_ids,
121
+ mock_create_scan,
122
+ mock_wait_for_scan,
123
+ mock_get_scan_result,
124
+ power_bi_client,
125
+ ):
126
+ mock_workspace_ids.return_value = list(range(200))
127
+ mock_create_scan.return_value = 314
128
+ mock_wait_for_scan.return_value = True
129
+ mock_get_scan_result.return_value = [{"workspace_id": 1871}]
130
+
131
+ result = list(power_bi_client._metadata())
132
+
133
+ assert result == [{"workspace_id": 1871}, {"workspace_id": 1871}]
134
+
135
+
136
+ def test__activity_events(power_bi_client):
137
+ day = date.today()
138
+ mocked_get_results = [
139
+ {
140
+ Keys.ACTIVITY_EVENT_ENTITIES: ["foo", "bar"],
141
+ Keys.LAST_RESULT_SET: False,
142
+ Keys.CONTINUATION_URI: "https://next-call-1",
143
+ },
144
+ {
145
+ Keys.ACTIVITY_EVENT_ENTITIES: ["baz"],
146
+ Keys.LAST_RESULT_SET: False,
147
+ Keys.CONTINUATION_URI: "https://next-call-2",
148
+ },
149
+ {
150
+ Keys.ACTIVITY_EVENT_ENTITIES: ["biz"],
151
+ Keys.LAST_RESULT_SET: True,
152
+ Keys.CONTINUATION_URI: None,
153
+ },
154
+ ]
155
+
156
+ with patch.object(power_bi_client, "_get") as mocked_get:
157
+ mocked_get.side_effect = mocked_get_results
158
+
159
+ result = list(power_bi_client._activity_events(day=day))
160
+ assert result == ["foo", "bar", "baz", "biz"]
161
+
162
+ expected_calls = [
163
+ call(endpoint=PowerBiEndpointFactory.activity_events(day=day)),
164
+ call(endpoint="https://next-call-1"),
165
+ call(endpoint="https://next-call-2"),
166
+ ]
167
+ mocked_get.assert_has_calls(expected_calls)
168
+
169
+
170
+ def test_test_connection(power_bi_client):
171
+ with patch.object(power_bi_client._auth, "refresh_token") as mock_refresh:
172
+ power_bi_client.test_connection()
173
+ mock_refresh.assert_called_once()
@@ -1,62 +1,3 @@
1
- """
2
- File regrouping all constants used in PowerBi client
3
- """
4
-
5
- DEFAULT_TIMEOUT_IN_SECS = 30
6
- SCAN_READY = "Succeeded"
7
- # ModifiedSince params should not be older than 30 days
8
- RECENT_DAYS = 30
9
-
10
- GET = "GET"
11
- POST = "POST"
12
-
13
-
14
- class Urls:
15
- """PowerBi's urls"""
16
-
17
- CLIENT_APP_BASE = "https://login.microsoftonline.com/"
18
- DEFAULT_SCOPE = "https://analysis.windows.net/powerbi/api/.default"
19
- REST_API_BASE_PATH = "https://api.powerbi.com/v1.0/myorg"
20
-
21
- # PBI rest API Routes
22
- ACTIVITY_EVENTS = f"{REST_API_BASE_PATH}/admin/activityevents"
23
- DASHBOARD = f"{REST_API_BASE_PATH}/admin/dashboards"
24
- DATASETS = f"{REST_API_BASE_PATH}/admin/datasets"
25
- GROUPS = f"{REST_API_BASE_PATH}/admin/groups"
26
- METADATA_GET = f"{REST_API_BASE_PATH}/admin/workspaces/scanResult"
27
- METADATA_POST = f"{REST_API_BASE_PATH}/admin/workspaces/getInfo"
28
- METADATA_WAIT = f"{REST_API_BASE_PATH}/admin/workspaces/scanStatus"
29
- REPORTS = f"{REST_API_BASE_PATH}/admin/reports"
30
- WORKSPACE_IDS = (
31
- "https://api.powerbi.com/v1.0/myorg/admin/workspaces/modified"
32
- )
33
-
34
-
35
- class Batches:
36
- """Batches used within PowerBI api calls"""
37
-
38
- DEFAULT = 100
39
- # The route we use to fetch workspaces info can retrieve a maximum of
40
- # 100 workspaces per call
41
- # More: https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-post-workspace-info#request-body
42
- METADATA = 100
43
-
44
-
45
- class QueryParams:
46
- """
47
- Frequently used PowerBi query params
48
- """
49
-
50
- METADATA_SCAN = {
51
- "datasetExpressions": True,
52
- "datasetSchema": True,
53
- "datasourceDetails": True,
54
- "getArtifactUsers": True,
55
- "lineage": True,
56
- }
57
- ACTIVE_WORKSPACE_FILTER = "state eq 'Active' and type eq 'Workspace'"
58
-
59
-
60
1
  class Keys:
61
2
  ACCESS_TOKEN = "access_token" # noqa: S105
62
3
  ACTIVITY_EVENT_ENTITIES = "activityEventEntities"
@@ -64,15 +5,7 @@ class Keys:
64
5
  ID = "id"
65
6
  INACTIVE_WORKSPACES = "excludeInActiveWorkspaces"
66
7
  LAST_RESULT_SET = "lastResultSet"
67
- MODIFIED_SINCE = "modifiedSince"
68
8
  PERSONAL_WORKSPACES = "excludePersonalWorkspaces"
69
9
  STATUS = "status"
70
10
  VALUE = "value"
71
11
  WORKSPACES = "workspaces"
72
-
73
-
74
- class Assertions:
75
- """Assertion's messages"""
76
-
77
- BATCH_TOO_BIG = f"Can not retrieve more than {Batches.METADATA} at the time"
78
- DATETIME_TOO_OLD = "Date must be within 30 days range"