castor-extractor 0.24.15__py3-none-any.whl → 0.24.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (36) hide show
  1. CHANGELOG.md +24 -0
  2. castor_extractor/commands/extract_looker_studio.py +6 -0
  3. castor_extractor/commands/extract_powerbi.py +2 -0
  4. castor_extractor/knowledge/confluence/assets.py +2 -0
  5. castor_extractor/knowledge/confluence/client/client.py +57 -3
  6. castor_extractor/knowledge/confluence/client/client_test.py +30 -0
  7. castor_extractor/knowledge/confluence/client/endpoints.py +18 -0
  8. castor_extractor/knowledge/confluence/extract.py +11 -0
  9. castor_extractor/knowledge/confluence/utils.py +12 -0
  10. castor_extractor/knowledge/confluence/utils_test.py +30 -0
  11. castor_extractor/{utils → transformation}/dbt/client_test.py +1 -3
  12. castor_extractor/utils/__init__.py +1 -1
  13. castor_extractor/utils/validation.py +2 -2
  14. castor_extractor/visualization/looker_studio/assets.py +5 -1
  15. castor_extractor/visualization/looker_studio/client/admin_sdk_client.py +1 -2
  16. castor_extractor/visualization/looker_studio/client/credentials.py +21 -0
  17. castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py +2 -2
  18. castor_extractor/visualization/looker_studio/extract.py +13 -2
  19. castor_extractor/visualization/powerbi/__init__.py +7 -1
  20. castor_extractor/visualization/powerbi/client/__init__.py +6 -1
  21. castor_extractor/visualization/powerbi/client/authentication.py +5 -1
  22. castor_extractor/visualization/powerbi/client/client.py +13 -9
  23. castor_extractor/visualization/powerbi/client/client_test.py +12 -7
  24. castor_extractor/visualization/powerbi/client/credentials.py +15 -0
  25. castor_extractor/visualization/powerbi/client/endpoints.py +27 -37
  26. castor_extractor/visualization/strategy/client/client.py +1 -3
  27. {castor_extractor-0.24.15.dist-info → castor_extractor-0.24.21.dist-info}/METADATA +25 -1
  28. {castor_extractor-0.24.15.dist-info → castor_extractor-0.24.21.dist-info}/RECORD +35 -34
  29. castor_extractor/visualization/looker_studio/client/scopes.py +0 -6
  30. /castor_extractor/{utils → transformation}/dbt/__init__.py +0 -0
  31. /castor_extractor/{utils → transformation}/dbt/assets.py +0 -0
  32. /castor_extractor/{utils → transformation}/dbt/client.py +0 -0
  33. /castor_extractor/{utils → transformation}/dbt/credentials.py +0 -0
  34. {castor_extractor-0.24.15.dist-info → castor_extractor-0.24.21.dist-info}/LICENCE +0 -0
  35. {castor_extractor-0.24.15.dist-info → castor_extractor-0.24.21.dist-info}/WHEEL +0 -0
  36. {castor_extractor-0.24.15.dist-info → castor_extractor-0.24.21.dist-info}/entry_points.txt +0 -0
CHANGELOG.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.21 - 2025-05-26
4
+
5
+ * Looker Studio: add option to skip the extraction of view activity logs
6
+
7
+ ## 0.24.20 - 2025-05-19
8
+
9
+ * Powerbi: allow custom api base and login url
10
+
11
+ ## 0.24.19 - 2025-05-14
12
+
13
+ * Confluence: extract databases
14
+
15
+ ## 0.24.18 - 2025-05-13
16
+
17
+ * Improve folder organisation for transformation tools
18
+
19
+ ## 0.24.17 - 2025-05-13
20
+
21
+ * Strategy: fix dashboard URL format
22
+
23
+ ## 0.24.16 - 2025-05-12
24
+
25
+ * Confluence: extract folders to complete the page hierarchy
26
+
3
27
  ## 0.24.15 - 2025-05-12
4
28
 
5
29
  * Tableau: Add argument to skip columns extraction
@@ -24,6 +24,12 @@ def main():
24
24
  "This can be the same file path as for Looker Studio."
25
25
  ),
26
26
  )
27
+ parser.add_argument(
28
+ "--skip-view-activity-logs",
29
+ action="store_true",
30
+ default=False,
31
+ help="Skips the extraction of activity logs",
32
+ )
27
33
 
28
34
  parser.add_argument("-o", "--output", help="Directory to write to")
29
35
 
@@ -20,5 +20,7 @@ def main():
20
20
  nargs="*",
21
21
  )
22
22
  parser.add_argument("-o", "--output", help="Directory to write to")
23
+ parser.add_argument("-l", "--login_url", help="Login url (Optional)")
24
+ parser.add_argument("-a", "--api_base", help="REST API base (Optional)")
23
25
 
24
26
  powerbi.extract_all(**parse_filled_arguments(parser))
@@ -4,5 +4,7 @@ from ...types import ExternalAsset
4
4
  class ConfluenceAsset(ExternalAsset):
5
5
  """Confluence assets"""
6
6
 
7
+ DATABASES = "databases"
8
+ FOLDERS = "folders"
7
9
  PAGES = "pages"
8
10
  USERS = "users"
@@ -1,6 +1,7 @@
1
+ import logging
1
2
  from collections.abc import Iterator
2
3
  from functools import partial
3
- from typing import Optional
4
+ from typing import Iterable, Optional
4
5
 
5
6
  from ....utils import (
6
7
  APIClient,
@@ -14,6 +15,8 @@ from .credentials import ConfluenceCredentials
14
15
  from .endpoints import ConfluenceEndpointFactory
15
16
  from .pagination import ConfluencePagination
16
17
 
18
+ logger = logging.getLogger(__name__)
19
+
17
20
  _HEADERS = {
18
21
  "Accept": "application/json",
19
22
  "Content-Type": "application/json",
@@ -46,6 +49,43 @@ class ConfluenceClient(APIClient):
46
49
  self.space_ids_allowed = space_ids_allowed or set()
47
50
  self.space_ids_blocked = space_ids_blocked or set()
48
51
 
52
+ def databases(self, database_ids: Iterable[str]) -> Iterator[dict]:
53
+ """
54
+ Extracts all given databases
55
+ """
56
+ for _id in database_ids:
57
+ database = self._get(
58
+ endpoint=ConfluenceEndpointFactory.database(_id),
59
+ )
60
+ yield database
61
+
62
+ def folders(self, folder_ids: Iterable[str]) -> Iterator[dict]:
63
+ """
64
+ Extracts all given folders and their parent folders.
65
+ """
66
+ candidate_ids = set(folder_ids)
67
+ seen = set()
68
+
69
+ while candidate_ids:
70
+ folder_id = candidate_ids.pop()
71
+ if folder_id in seen:
72
+ continue
73
+
74
+ seen.add(folder_id)
75
+ folder = self._get(
76
+ endpoint=ConfluenceEndpointFactory.folder(folder_id),
77
+ )
78
+ yield folder
79
+
80
+ parent_type = folder.get("parentType")
81
+ if parent_type == "folder":
82
+ folder_id = folder["parentId"]
83
+ if folder_id not in seen:
84
+ candidate_ids.add(folder["parentId"])
85
+
86
+ if not parent_type:
87
+ logger.info(f"folder with unknown parent: {folder_id}")
88
+
49
89
  def pages(self):
50
90
  """Extracts all pages from all relevant Spaces."""
51
91
  for space in self.spaces():
@@ -106,9 +146,23 @@ class ConfluenceClient(APIClient):
106
146
  )
107
147
  yield from fetch_all_pages(request, ConfluencePagination)
108
148
 
109
- def fetch(self, asset: ConfluenceAsset) -> Iterator[dict]:
149
+ def fetch(
150
+ self,
151
+ asset: ConfluenceAsset,
152
+ *,
153
+ folder_ids: Optional[Iterator[str]] = None,
154
+ database_ids: Optional[Iterator[str]] = None,
155
+ ) -> Iterator[dict]:
110
156
  """Returns the needed metadata for the queried asset"""
111
- if asset == ConfluenceAsset.PAGES:
157
+ if asset == ConfluenceAsset.FOLDERS:
158
+ assert folder_ids is not None
159
+ yield from self.folders(folder_ids)
160
+
161
+ elif asset == ConfluenceAsset.DATABASES:
162
+ assert database_ids is not None
163
+ yield from self.databases(database_ids)
164
+
165
+ elif asset == ConfluenceAsset.PAGES:
112
166
  yield from self.pages()
113
167
 
114
168
  elif asset == ConfluenceAsset.USERS:
@@ -3,6 +3,36 @@ from unittest.mock import MagicMock, patch
3
3
  from .client import ConfluenceClient
4
4
 
5
5
 
6
+ def test_ConfluenceClient_folders():
7
+ """
8
+ Folder 1 -> Page A -> Folder 2 -> Folder 3 -> Folder 4 -> Page B
9
+ -> Page C -> Folder 5 -> Page D
10
+
11
+ After extracting the pages, we should have all IDs of folders that are
12
+ immediate parents of pages. We still need to look out for nested folders.
13
+ """
14
+ folder_ids = {"1", "3", "4", "5"}
15
+ mock_responses = {
16
+ "1": {"id": "1", "parentType": None, "parentId": None},
17
+ "2": {"id": "2", "parentType": "page", "parentId": "A"},
18
+ "3": {"id": "3", "parentType": "folder", "parentId": "2"},
19
+ "4": {"id": "4", "parentType": "folder", "parentId": "3"},
20
+ "5": {"id": "5", "parentType": "page", "parentId": "C"},
21
+ }
22
+
23
+ def mock_get(endpoint):
24
+ folder_id = endpoint.split("/")[-1]
25
+ return mock_responses[folder_id]
26
+
27
+ client = ConfluenceClient(credentials=MagicMock())
28
+
29
+ with patch.object(client, "_get", side_effect=mock_get):
30
+ result = list(client.folders(folder_ids))
31
+
32
+ assert len(result) == 5
33
+ assert {folder["id"] for folder in result} == set(mock_responses.keys())
34
+
35
+
6
36
  def test_ConfluenceClient_filtered_spaces_with_allowlist():
7
37
  both_blocked_and_allowed_space_id = "789"
8
38
  archived_space_id = "934"
@@ -5,10 +5,28 @@ class ConfluenceEndpointFactory:
5
5
  """
6
6
 
7
7
  API = "wiki/api/v2/"
8
+ DATABASE = "databases"
9
+ FOLDERS = "folders"
8
10
  PAGES = "pages"
9
11
  SPACES = "spaces"
10
12
  USERS = "users-bulk"
11
13
 
14
+ @classmethod
15
+ def database(cls, database_id: str) -> str:
16
+ """
17
+ Endpoint to fetch a database by id.
18
+ More: https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-database/#api-databases-id-get
19
+ """
20
+ return f"{cls.API}{cls.DATABASE}/{database_id}"
21
+
22
+ @classmethod
23
+ def folder(cls, folder_id: str) -> str:
24
+ """
25
+ Endpoint to fetch a folder by id.
26
+ More: https://developer.atlassian.com/cloud/confluence/rest/v2/api-group-folder/#api-folders-id-get
27
+ """
28
+ return f"{cls.API}{cls.FOLDERS}/{folder_id}"
29
+
12
30
  @classmethod
13
31
  def pages(cls, space_id: str) -> str:
14
32
  """
@@ -13,6 +13,7 @@ from ...utils import (
13
13
  )
14
14
  from .assets import ConfluenceAsset
15
15
  from .client import ConfluenceClient, ConfluenceCredentials
16
+ from .utils import pages_to_database_ids, pages_to_folder_ids
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
@@ -32,6 +33,16 @@ def iterate_all_data(
32
33
  yield ConfluenceAsset.PAGES, pages
33
34
  logger.info(f"Extracted {len(pages)} pages from API")
34
35
 
36
+ folder_ids = pages_to_folder_ids(pages)
37
+ logger.info("Extracting FOLDERS from API")
38
+ folders = list(deep_serialize(client.folders(folder_ids)))
39
+ yield ConfluenceAsset.FOLDERS, folders
40
+
41
+ database_ids = pages_to_database_ids(pages)
42
+ logger.info("Extracting DATABASES from API")
43
+ databases = list(deep_serialize(client.databases(database_ids)))
44
+ yield ConfluenceAsset.DATABASES, databases
45
+
35
46
 
36
47
  def extract_all(**kwargs) -> None:
37
48
  """
@@ -0,0 +1,12 @@
1
+ def pages_to_folder_ids(pages: list[dict]) -> set:
2
+ """Returns all unique folder parents."""
3
+ return {
4
+ page["parentId"] for page in pages if page["parentType"] == "folder"
5
+ }
6
+
7
+
8
+ def pages_to_database_ids(pages: list[dict]) -> set:
9
+ """Returns all unique database parents."""
10
+ return {
11
+ page["parentId"] for page in pages if page["parentType"] == "database"
12
+ }
@@ -0,0 +1,30 @@
1
+ from .utils import pages_to_database_ids, pages_to_folder_ids
2
+
3
+
4
+ def test_pages_to_folder_ids():
5
+ """Test the pages_to_folder_ids function."""
6
+ pages = [
7
+ {"id": "9", "parentId": None, "parentType": None},
8
+ {"id": "8", "parentId": "2", "parentType": "folder"},
9
+ {"id": "7", "parentId": "9", "parentType": "page"},
10
+ {"id": "6", "parentId": "4", "parentType": "folder"},
11
+ {"id": "5", "parentId": "4", "parentType": "folder"},
12
+ ]
13
+ expected = {"2", "4"}
14
+ result = pages_to_folder_ids(pages)
15
+ assert result == expected
16
+
17
+
18
+ def test_pages_to_database_id():
19
+ """Test the pages_to_database_id function."""
20
+ pages = [
21
+ {"id": "1", "parentId": "db1", "parentType": "database"},
22
+ {"id": "2", "parentId": "db2", "parentType": "database"},
23
+ {"id": "3", "parentId": "4", "parentType": "folder"},
24
+ {"id": "4", "parentId": None, "parentType": None},
25
+ {"id": "5", "parentId": "db1", "parentType": "database"},
26
+ {"id": "6", "parentId": "9", "parentType": "page"},
27
+ ]
28
+ expected = {"db1", "db2"}
29
+ result = pages_to_database_ids(pages)
30
+ assert result == expected
@@ -9,9 +9,7 @@ from dateutil.tz import tzutc
9
9
  from .client import ContentType, DbtClient, DbtRun, _account_url # type: ignore
10
10
  from .credentials import DbtCredentials
11
11
 
12
- _DBT_CLIENT_PATH = (
13
- "source.packages.extractor.castor_extractor.utils.dbt.client.DbtClient"
14
- )
12
+ _DBT_CLIENT_PATH = "source.packages.extractor.castor_extractor.transformation.dbt.client.DbtClient"
15
13
  _OLD_DATE = datetime(2023, 7, 10, 12, 6, 23, 109171, tzinfo=tzutc())
16
14
  _OLD_DATE_STR = "2023-07-10 12:06:23.109171+00:00"
17
15
  _RECENT_DATE = datetime(2023, 10, 6, 5, 9, 31, 731991, tzinfo=tzutc())
@@ -52,7 +52,7 @@ from .time import (
52
52
  yesterday,
53
53
  )
54
54
  from .type import Callback, Getter, JsonType, SerializedAsset
55
- from .validation import validate_baseurl
55
+ from .validation import clean_path, validate_baseurl
56
56
  from .write import (
57
57
  get_output_filename,
58
58
  get_summary_filename,
@@ -14,7 +14,7 @@ def _preprocess_url(base_url: str) -> str:
14
14
  return base_url.strip()
15
15
 
16
16
 
17
- def _clean_path(path: str) -> str:
17
+ def clean_path(path: str) -> str:
18
18
  return path.rstrip("/")
19
19
 
20
20
 
@@ -44,7 +44,7 @@ def _urlsplit(base_url: str) -> tuple[str, str, str, str, str, str]:
44
44
  url = urlsplit(base_url)
45
45
 
46
46
  hostname, port = _get_hostname_port(url.netloc)
47
- path = _clean_path(url.path)
47
+ path = clean_path(url.path)
48
48
 
49
49
  return url.scheme, hostname, path, port, url.query, url.fragment
50
50
 
@@ -1,7 +1,11 @@
1
- from ...types import ExternalAsset
1
+ from ...types import ExternalAsset, classproperty
2
2
 
3
3
 
4
4
  class LookerStudioAsset(ExternalAsset):
5
5
  ASSETS = "assets"
6
6
  SOURCE_QUERIES = "source_queries"
7
7
  VIEW_ACTIVITY = "view_activity"
8
+
9
+ @classproperty
10
+ def optional(cls) -> set["LookerStudioAsset"]:
11
+ return {LookerStudioAsset.VIEW_ACTIVITY}
@@ -12,7 +12,6 @@ from ....utils import (
12
12
  )
13
13
  from .credentials import LookerStudioCredentials
14
14
  from .pagination import LookerStudioPagination
15
- from .scopes import SCOPES
16
15
 
17
16
  USER_EMAIL_FIELD = "primaryEmail"
18
17
 
@@ -26,7 +25,7 @@ class AdminSDKClient:
26
25
  def __init__(self, credentials: LookerStudioCredentials):
27
26
  self._credentials = Credentials.from_service_account_info(
28
27
  credentials.model_dump(),
29
- scopes=SCOPES,
28
+ scopes=credentials.scopes,
30
29
  subject=credentials.admin_email, # impersonates an admin
31
30
  )
32
31
  self.directory_api = discovery.build(
@@ -1,5 +1,18 @@
1
+ from typing import Optional
2
+
1
3
  from pydantic import BaseModel, SecretStr, field_serializer
2
4
 
5
+ SCOPES_NO_ACTIVITY: tuple[str, ...] = (
6
+ "https://www.googleapis.com/auth/datastudio",
7
+ "https://www.googleapis.com/auth/userinfo.profile",
8
+ "https://www.googleapis.com/auth/admin.directory.user.readonly",
9
+ )
10
+
11
+ DEFAULT_SCOPES: tuple[str, ...] = (
12
+ *SCOPES_NO_ACTIVITY,
13
+ "https://www.googleapis.com/auth/admin.reports.audit.readonly",
14
+ )
15
+
3
16
 
4
17
  class LookerStudioCredentials(BaseModel):
5
18
  """
@@ -19,6 +32,14 @@ class LookerStudioCredentials(BaseModel):
19
32
  token_uri: str
20
33
  type: str
21
34
 
35
+ has_view_activity_logs: Optional[bool] = True
36
+ scopes: Optional[tuple] = DEFAULT_SCOPES
37
+
38
+ def model_post_init(self, __context):
39
+ """Set scopes based on has_view_activity_logs after initialization"""
40
+ if self.has_view_activity_logs is False:
41
+ self.scopes = SCOPES_NO_ACTIVITY
42
+
22
43
  @field_serializer("private_key")
23
44
  def dump_secret(self, pk):
24
45
  """When using model_dump, show private_key value"""
@@ -16,7 +16,6 @@ from .credentials import LookerStudioCredentials
16
16
  from .endpoints import LookerStudioAPIEndpoint
17
17
  from .enums import LookerStudioAssetType
18
18
  from .pagination import LookerStudioPagination
19
- from .scopes import SCOPES
20
19
 
21
20
 
22
21
  @contextmanager
@@ -47,7 +46,8 @@ class LookerStudioAPIAuth(BearerAuth):
47
46
  that user and make requests on that user's behalf.
48
47
  """
49
48
  self._credentials = Credentials.from_service_account_info(
50
- credentials.model_dump(), scopes=SCOPES
49
+ credentials.model_dump(),
50
+ scopes=credentials.scopes,
51
51
  )
52
52
  if subject:
53
53
  self._credentials = self._credentials.with_subject(subject)
@@ -23,8 +23,14 @@ LOOKER_STUDIO_ADMIN_EMAIL = "CASTOR_LOOKER_STUDIO_ADMIN_EMAIL"
23
23
 
24
24
  def iterate_all_data(
25
25
  client: LookerStudioClient,
26
+ has_view_activity_logs: bool = True,
26
27
  ) -> Iterable[tuple[LookerStudioAsset, Union[list, dict]]]:
27
- for asset in LookerStudioAsset:
28
+ assets_to_extract = LookerStudioAsset.mandatory
29
+
30
+ if has_view_activity_logs:
31
+ assets_to_extract.add(LookerStudioAsset.VIEW_ACTIVITY)
32
+
33
+ for asset in assets_to_extract:
28
34
  logger.info(f"Extracting {asset.name} from API")
29
35
  data = list(deep_serialize(client.fetch(asset)))
30
36
  yield asset, data
@@ -45,6 +51,8 @@ def _credentials(params: dict) -> LookerStudioCredentials:
45
51
  LOOKER_STUDIO_ADMIN_EMAIL
46
52
  )
47
53
  credentials["admin_email"] = admin_email
54
+ has_view_activity_logs = not params["skip_view_activity_logs"]
55
+ credentials["has_view_activity_logs"] = has_view_activity_logs
48
56
  return LookerStudioCredentials(**credentials)
49
57
 
50
58
 
@@ -68,7 +76,10 @@ def extract_all(**kwargs) -> None:
68
76
  the given output_directory.
69
77
  """
70
78
  output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
79
+
71
80
  credentials = _credentials(kwargs)
81
+ has_view_activity_logs = bool(credentials.has_view_activity_logs)
82
+
72
83
  bigquery_credentials = _bigquery_credentials_or_none(kwargs)
73
84
 
74
85
  client = LookerStudioClient(
@@ -77,7 +88,7 @@ def extract_all(**kwargs) -> None:
77
88
  )
78
89
  ts = current_timestamp()
79
90
 
80
- for key, data in iterate_all_data(client):
91
+ for key, data in iterate_all_data(client, has_view_activity_logs):
81
92
  filename = get_output_filename(key.name.lower(), output_directory, ts)
82
93
  write_json(filename, data)
83
94
 
@@ -1,3 +1,9 @@
1
1
  from .assets import PowerBiAsset
2
- from .client import DEFAULT_SCOPE, PowerbiClient, PowerbiCredentials
2
+ from .client import (
3
+ CLIENT_APP_BASE,
4
+ DEFAULT_SCOPE,
5
+ REST_API_BASE_PATH,
6
+ PowerbiClient,
7
+ PowerbiCredentials,
8
+ )
3
9
  from .extract import extract_all
@@ -1,2 +1,7 @@
1
1
  from .client import PowerbiClient
2
- from .credentials import DEFAULT_SCOPE, PowerbiCredentials
2
+ from .credentials import (
3
+ CLIENT_APP_BASE,
4
+ DEFAULT_SCOPE,
5
+ REST_API_BASE_PATH,
6
+ PowerbiCredentials,
7
+ )
@@ -9,7 +9,11 @@ from .endpoints import PowerBiEndpointFactory
9
9
  class PowerBiBearerAuth(BearerAuth):
10
10
  def __init__(self, credentials: PowerbiCredentials):
11
11
  self.credentials = credentials
12
- authority = PowerBiEndpointFactory.authority(self.credentials.tenant_id)
12
+ endpoint_factory = PowerBiEndpointFactory(
13
+ login_url=self.credentials.login_url,
14
+ api_base=self.credentials.api_base,
15
+ )
16
+ authority = endpoint_factory.authority(self.credentials.tenant_id)
13
17
  self.app = msal.ConfidentialClientApplication(
14
18
  client_id=self.credentials.client_id,
15
19
  authority=authority,
@@ -40,6 +40,10 @@ class PowerbiClient(APIClient):
40
40
  auth=auth,
41
41
  timeout=POWERBI_DEFAULT_TIMEOUT_S,
42
42
  )
43
+ self.endpoint_factory = PowerBiEndpointFactory(
44
+ login_url=credentials.login_url,
45
+ api_base=credentials.api_base,
46
+ )
43
47
 
44
48
  def _activity_events(self, day: Optional[date] = None) -> Iterator[dict]:
45
49
  """
@@ -49,7 +53,7 @@ class PowerbiClient(APIClient):
49
53
  """
50
54
  request = partial(
51
55
  self._get,
52
- endpoint=PowerBiEndpointFactory.activity_events(day),
56
+ endpoint=self.endpoint_factory.activity_events(day),
53
57
  )
54
58
  yield from fetch_all_pages(request, PowerBiPagination)
55
59
 
@@ -58,28 +62,28 @@ class PowerbiClient(APIClient):
58
62
  Returns a list of datasets for the organization.
59
63
  https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin
60
64
  """
61
- yield from self._get(PowerBiEndpointFactory.datasets())[Keys.VALUE]
65
+ yield from self._get(self.endpoint_factory.datasets())[Keys.VALUE]
62
66
 
63
67
  def _dashboards(self) -> Iterator[dict]:
64
68
  """
65
69
  Returns a list of dashboards for the organization.
66
70
  https://learn.microsoft.com/en-us/rest/api/power-bi/admin/dashboards-get-dashboards-as-admin
67
71
  """
68
- yield from self._get(PowerBiEndpointFactory.dashboards())[Keys.VALUE]
72
+ yield from self._get(self.endpoint_factory.dashboards())[Keys.VALUE]
69
73
 
70
74
  def _reports(self) -> Iterator[dict]:
71
75
  """
72
76
  Returns a list of reports for the organization.
73
77
  https://learn.microsoft.com/en-us/rest/api/power-bi/admin/reports-get-reports-as-admin
74
78
  """
75
- reports_endpoint = PowerBiEndpointFactory.reports()
79
+ reports_endpoint = self.endpoint_factory.reports()
76
80
  reports = self._get(reports_endpoint)[Keys.VALUE]
77
81
 
78
82
  for report in reports:
79
83
  report_id = report.get(Keys.ID)
80
84
 
81
85
  try:
82
- pages_endpoint = PowerBiEndpointFactory.pages(report_id)
86
+ pages_endpoint = self.endpoint_factory.pages(report_id)
83
87
  pages = self._get(pages_endpoint)[Keys.VALUE]
84
88
  report["pages"] = pages
85
89
  except (requests.HTTPError, requests.exceptions.Timeout) as e:
@@ -99,14 +103,14 @@ class PowerbiClient(APIClient):
99
103
  }
100
104
 
101
105
  response = self._get(
102
- PowerBiEndpointFactory.workspace_ids(),
106
+ self.endpoint_factory.workspace_ids(),
103
107
  params=params,
104
108
  )
105
109
 
106
110
  return [x[Keys.ID] for x in response]
107
111
 
108
112
  def _get_scan_result(self, scan_id: int) -> Iterator[dict]:
109
- endpoint = PowerBiEndpointFactory.metadata_scan_result(scan_id)
113
+ endpoint = self.endpoint_factory.metadata_scan_result(scan_id)
110
114
  yield from self._get(endpoint)[Keys.WORKSPACES]
111
115
 
112
116
  def _wait_for_scan_result(self, scan_id: int) -> bool:
@@ -114,7 +118,7 @@ class PowerbiClient(APIClient):
114
118
  Periodically checks the status of the metadata scan until the results
115
119
  are ready.
116
120
  """
117
- endpoint = PowerBiEndpointFactory.metadata_scan_status(scan_id)
121
+ endpoint = self.endpoint_factory.metadata_scan_status(scan_id)
118
122
  total_waiting_time_s = 0
119
123
 
120
124
  while total_waiting_time_s < POWERBI_DEFAULT_TIMEOUT_S:
@@ -152,7 +156,7 @@ class PowerbiClient(APIClient):
152
156
  }
153
157
  request_body = {"workspaces": workspaces_ids}
154
158
  scan_id = self._post(
155
- PowerBiEndpointFactory.metadata_create_scan(),
159
+ self.endpoint_factory.metadata_create_scan(),
156
160
  params=params,
157
161
  data=request_body,
158
162
  )
@@ -6,13 +6,18 @@ import pytest
6
6
  from .authentication import msal
7
7
  from .client import PowerbiClient
8
8
  from .constants import Keys
9
- from .credentials import PowerbiCredentials
9
+ from .credentials import CLIENT_APP_BASE, REST_API_BASE_PATH, PowerbiCredentials
10
10
  from .endpoints import PowerBiEndpointFactory
11
11
 
12
12
  FAKE_TENANT_ID = "IamFake"
13
13
  FAKE_CLIENT_ID = "MeTwo"
14
14
  FAKE_SECRET = "MeThree"
15
15
 
16
+ ENDPOINT_FACTORY = PowerBiEndpointFactory(
17
+ login_url=CLIENT_APP_BASE,
18
+ api_base=REST_API_BASE_PATH + "/", # superfluous "/" to test resiliency
19
+ )
20
+
16
21
 
17
22
  @pytest.fixture
18
23
  def mock_msal():
@@ -55,7 +60,7 @@ def test__datasets(power_bi_client):
55
60
  with patch.object(power_bi_client, "_get") as mocked_get:
56
61
  mocked_get.return_value = {"value": [{"id": 1, "type": "dataset"}]}
57
62
  datasets = list(power_bi_client._datasets())
58
- mocked_get.assert_called_with(PowerBiEndpointFactory.datasets())
63
+ mocked_get.assert_called_with(ENDPOINT_FACTORY.datasets())
59
64
  assert datasets == [{"id": 1, "type": "dataset"}]
60
65
 
61
66
 
@@ -63,7 +68,7 @@ def test__dashboards(power_bi_client):
63
68
  with patch.object(power_bi_client, "_get") as mocked_get:
64
69
  mocked_get.return_value = {"value": [{"id": 1, "type": "dashboard"}]}
65
70
  dashboards = list(power_bi_client._dashboards())
66
- mocked_get.assert_called_with(PowerBiEndpointFactory.dashboards())
71
+ mocked_get.assert_called_with(ENDPOINT_FACTORY.dashboards())
67
72
  assert dashboards == [{"id": 1, "type": "dashboard"}]
68
73
 
69
74
 
@@ -79,8 +84,8 @@ def test__reports(power_bi_client):
79
84
  ]
80
85
  reports = list(power_bi_client._reports())
81
86
  calls = [
82
- call(PowerBiEndpointFactory.reports()),
83
- call(PowerBiEndpointFactory.pages("1")),
87
+ call(ENDPOINT_FACTORY.reports()),
88
+ call(ENDPOINT_FACTORY.pages("1")),
84
89
  ]
85
90
  mocked_get.assert_has_calls(calls)
86
91
  assert reports == [
@@ -107,7 +112,7 @@ def test__workspace_ids(power_bi_client):
107
112
  }
108
113
 
109
114
  mocked_get.assert_called_with(
110
- PowerBiEndpointFactory.workspace_ids(),
115
+ ENDPOINT_FACTORY.workspace_ids(),
111
116
  params=params,
112
117
  )
113
118
 
@@ -160,7 +165,7 @@ def test__activity_events(power_bi_client):
160
165
  assert result == ["foo", "bar", "baz", "biz"]
161
166
 
162
167
  expected_calls = [
163
- call(endpoint=PowerBiEndpointFactory.activity_events(day=day)),
168
+ call(endpoint=ENDPOINT_FACTORY.activity_events(day=day)),
164
169
  call(endpoint="https://next-call-1"),
165
170
  call(endpoint="https://next-call-2"),
166
171
  ]
@@ -6,6 +6,9 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
6
6
  DEFAULT_SCOPE = "https://analysis.windows.net/powerbi/api/.default"
7
7
  POWERBI_ENV_PREFIX = "CASTOR_POWERBI_"
8
8
 
9
+ CLIENT_APP_BASE = "https://login.microsoftonline.com"
10
+ REST_API_BASE_PATH = "https://api.powerbi.com/v1.0/myorg"
11
+
9
12
 
10
13
  class PowerbiCredentials(BaseSettings):
11
14
  """Class to handle PowerBI rest API permissions"""
@@ -19,9 +22,21 @@ class PowerbiCredentials(BaseSettings):
19
22
  client_id: str
20
23
  tenant_id: str
21
24
  secret: str = Field(repr=False)
25
+ api_base: str = REST_API_BASE_PATH
26
+ login_url: str = CLIENT_APP_BASE
22
27
  scopes: list[str] = [DEFAULT_SCOPE]
23
28
 
24
29
  @field_validator("scopes", mode="before")
25
30
  @classmethod
26
31
  def _check_scopes(cls, scopes: Optional[list[str]]) -> list[str]:
27
32
  return scopes if scopes is not None else [DEFAULT_SCOPE]
33
+
34
+ @field_validator("login_url", mode="before")
35
+ @classmethod
36
+ def _check_login_url(cls, login_url: Optional[str]) -> str:
37
+ return login_url if login_url is not None else CLIENT_APP_BASE
38
+
39
+ @field_validator("api_base", mode="before")
40
+ @classmethod
41
+ def _check_api_base(cls, api_base: Optional[str]) -> str:
42
+ return api_base if api_base is not None else REST_API_BASE_PATH
@@ -1,10 +1,7 @@
1
1
  from datetime import date, datetime
2
2
  from typing import Optional
3
3
 
4
- from ....utils import at_midnight, format_date, yesterday
5
-
6
- _CLIENT_APP_BASE = "https://login.microsoftonline.com"
7
- _REST_API_BASE_PATH = "https://api.powerbi.com/v1.0/myorg"
4
+ from ....utils import at_midnight, clean_path, format_date, yesterday
8
5
 
9
6
 
10
7
  def _time_filter(day: Optional[date]) -> tuple[datetime, datetime]:
@@ -15,51 +12,44 @@ def _time_filter(day: Optional[date]) -> tuple[datetime, datetime]:
15
12
 
16
13
 
17
14
  class PowerBiEndpointFactory:
18
- @classmethod
19
- def activity_events(cls, day: Optional[date]) -> str:
15
+ def __init__(self, login_url: str, api_base: str):
16
+ self.app_base = clean_path(login_url)
17
+ self.rest_api_base = clean_path(api_base)
18
+
19
+ def activity_events(self, day: Optional[date]) -> str:
20
20
  start, end = _time_filter(day)
21
- url = f"{_REST_API_BASE_PATH}/admin/activityevents"
21
+ url = f"{self.rest_api_base}/admin/activityevents"
22
22
  url += "?$filter=Activity eq 'viewreport'"
23
23
  url += f"&startDateTime='{format_date(start)}'"
24
24
  url += f"&endDateTime='{format_date(end)}'"
25
25
  return url
26
26
 
27
- @classmethod
28
- def authority(cls, tenant_id: str) -> str:
29
- return f"{_CLIENT_APP_BASE}/{tenant_id}"
27
+ def authority(self, tenant_id: str) -> str:
28
+ return f"{self.app_base}/{tenant_id}"
30
29
 
31
- @classmethod
32
- def dashboards(cls) -> str:
33
- return f"{_REST_API_BASE_PATH}/admin/dashboards"
30
+ def dashboards(self) -> str:
31
+ return f"{self.rest_api_base}/admin/dashboards"
34
32
 
35
- @classmethod
36
- def datasets(cls) -> str:
37
- return f"{_REST_API_BASE_PATH}/admin/datasets"
33
+ def datasets(self) -> str:
34
+ return f"{self.rest_api_base}/admin/datasets"
38
35
 
39
- @classmethod
40
- def groups(cls) -> str:
41
- return f"{_REST_API_BASE_PATH}/admin/groups"
36
+ def groups(self) -> str:
37
+ return f"{self.rest_api_base}/admin/groups"
42
38
 
43
- @classmethod
44
- def metadata_create_scan(cls) -> str:
45
- return f"{_REST_API_BASE_PATH}/admin/workspaces/getInfo"
39
+ def metadata_create_scan(self) -> str:
40
+ return f"{self.rest_api_base}/admin/workspaces/getInfo"
46
41
 
47
- @classmethod
48
- def metadata_scan_result(cls, scan_id: int) -> str:
49
- return f"{_REST_API_BASE_PATH}/admin/workspaces/scanResult/{scan_id}"
42
+ def metadata_scan_result(self, scan_id: int) -> str:
43
+ return f"{self.rest_api_base}/admin/workspaces/scanResult/{scan_id}"
50
44
 
51
- @classmethod
52
- def metadata_scan_status(cls, scan_id: int) -> str:
53
- return f"{_REST_API_BASE_PATH}/admin/workspaces/scanStatus/{scan_id}"
45
+ def metadata_scan_status(self, scan_id: int) -> str:
46
+ return f"{self.rest_api_base}/admin/workspaces/scanStatus/{scan_id}"
54
47
 
55
- @classmethod
56
- def pages(cls, report_id: str) -> str:
57
- return f"{_REST_API_BASE_PATH}/admin/reports/{report_id}/pages"
48
+ def pages(self, report_id: str) -> str:
49
+ return f"{self.rest_api_base}/admin/reports/{report_id}/pages"
58
50
 
59
- @classmethod
60
- def reports(cls) -> str:
61
- return f"{_REST_API_BASE_PATH}/admin/reports"
51
+ def reports(self) -> str:
52
+ return f"{self.rest_api_base}/admin/reports"
62
53
 
63
- @classmethod
64
- def workspace_ids(cls) -> str:
65
- return f"{_REST_API_BASE_PATH}/admin/workspaces/modified"
54
+ def workspace_ids(self) -> str:
55
+ return f"{self.rest_api_base}/admin/workspaces/modified"
@@ -34,9 +34,7 @@ _BATCH_SIZE: int = 100
34
34
 
35
35
 
36
36
  class URLTemplates(Enum):
37
- DASHBOARD = (
38
- "https://{hostname}/MicroStrategyLibrarySTD/app/{project_id}/{id_}"
39
- )
37
+ DASHBOARD = "https://{hostname}/MicroStrategyLibrary/app/{project_id}/{id_}"
40
38
  DOCUMENT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?documentID={id_}&projectID={project_id}"
41
39
  REPORT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?reportID={id_}&projectID={project_id}"
42
40
  FOLDER = "https://{hostname}/MicroStrategy/servlet/mstrWeb?folderID={id_}&projectID={project_id}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.15
3
+ Version: 0.24.21
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -215,6 +215,30 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
215
215
 
216
216
  # Changelog
217
217
 
218
+ ## 0.24.21 - 2025-05-26
219
+
220
+ * Looker Studio: add option to skip the extraction of view activity logs
221
+
222
+ ## 0.24.20 - 2025-05-19
223
+
224
+ * Powerbi: allow custom api base and login url
225
+
226
+ ## 0.24.19 - 2025-05-14
227
+
228
+ * Confluence: extract databases
229
+
230
+ ## 0.24.18 - 2025-05-13
231
+
232
+ * Improve folder organisation for transformation tools
233
+
234
+ ## 0.24.17 - 2025-05-13
235
+
236
+ * Strategy: fix dashboard URL format
237
+
238
+ ## 0.24.16 - 2025-05-12
239
+
240
+ * Confluence: extract folders to complete the page hierarchy
241
+
218
242
  ## 0.24.15 - 2025-05-12
219
243
 
220
244
  * Tableau: Add argument to skip columns extraction
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=6XUz09FfUZSXFfROVG7BliBupfyr5eeBy0J3cQZVvys,17398
1
+ CHANGELOG.md,sha256=tja1IyeA0_DdgOOJLdH8WMTmSRucj5bnvR0G0tg_Nsk,17859
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -10,14 +10,14 @@ castor_extractor/commands/extract_confluence.py,sha256=blYcnDqywXNKRQ1aZAD9FclhL
10
10
  castor_extractor/commands/extract_databricks.py,sha256=SVKyoa-BBUQAM6HRHf1Wdg9-tpICic2yyvXQwHcNBhA,1264
11
11
  castor_extractor/commands/extract_domo.py,sha256=jvAawUsUTHrwCn_koK6StmQr4n_b5GyvJi6uu6WS0SM,1061
12
12
  castor_extractor/commands/extract_looker.py,sha256=cySLiolLCgrREJ9d0kMrJ7P8K3efHTBTzShalWVfI3A,1214
13
- castor_extractor/commands/extract_looker_studio.py,sha256=e79gbyTtCexRz5pg_Pp55GWkXJZWjm6NvVclmvcR0lM,916
13
+ castor_extractor/commands/extract_looker_studio.py,sha256=M7wx8XZScLizCI2vq80aj88vYrdiHChiCiebrrChlZY,1090
14
14
  castor_extractor/commands/extract_metabase_api.py,sha256=NXctea4GT_1iRDitY92nV3TKSqhjEUwYSxwPJMRS3iw,786
15
15
  castor_extractor/commands/extract_metabase_db.py,sha256=tYIhTPPgj1mN-07LyWcL6e-YoGp7HCWda58-5Ukyg_I,1255
16
16
  castor_extractor/commands/extract_mode.py,sha256=Q4iO-VAKMg4zFPejhAO-foZibL5Ht3jsnhWKwJ0oqUU,823
17
17
  castor_extractor/commands/extract_mysql.py,sha256=7AH5qMzeLTsENCOeJwtesrWg8Vo8MCEq8fx2YT74Mcw,1034
18
18
  castor_extractor/commands/extract_notion.py,sha256=uaxcF3_bT7D_-JxnIW0F7VVDphI_ZgOfQQxZzoLXo_M,504
19
19
  castor_extractor/commands/extract_postgres.py,sha256=pX0RnCPi4nw6QQ6wiAuZ_Xt3ZbDuMUG9aQKuqFgJtAU,1154
20
- castor_extractor/commands/extract_powerbi.py,sha256=f0G5w61KXExJ6Sw39_mJIwqQNpLorE5-LKmZXlUqvKI,783
20
+ castor_extractor/commands/extract_powerbi.py,sha256=RKkw9H2ZsbJ4xLE84bmNFUgYUjlrLmSXahQSVrQr_Bc,934
21
21
  castor_extractor/commands/extract_qlik.py,sha256=VBe_xFKh_nR0QSFFIncAaC8yDqBeMa6VunBAga7AeGg,891
22
22
  castor_extractor/commands/extract_redshift.py,sha256=zRBg2D_ft4GLdPSdmetRcgQVAA80DXtdRSYsQhAWIik,1334
23
23
  castor_extractor/commands/extract_salesforce.py,sha256=3j3YTmMkPAwocR-B1ozJQai0UIZPtpmAyWj-hHvdWn4,1226
@@ -43,14 +43,16 @@ castor_extractor/file_checker/templates/__init__.py,sha256=StVLm4ZGyGVmPzarxEaDR
43
43
  castor_extractor/file_checker/templates/generic_warehouse.py,sha256=S5qFIkbfihdWz16y4HSiTfDH1SmGl40u3kZ706sFBVI,2939
44
44
  castor_extractor/knowledge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  castor_extractor/knowledge/confluence/__init__.py,sha256=pRT615pMDlB7Ifs09erVn2EdpZHgkvX5selemWU3VPE,129
46
- castor_extractor/knowledge/confluence/assets.py,sha256=zv2G2LB8H0fKDbVJ4kHrAjbqehXI_K-wgd_ghSXGFvs,144
46
+ castor_extractor/knowledge/confluence/assets.py,sha256=ZRmRnJpbujL8LMQD3FrcFME-pEQM7G1lCZ1abTsx5OU,196
47
47
  castor_extractor/knowledge/confluence/client/__init__.py,sha256=ALAzo0JEhxFzH2FnIO6HmtkAGS2_bGY8KXXMcTGV3aE,84
48
- castor_extractor/knowledge/confluence/client/client.py,sha256=ihdagtAEgIcO5MmX5-coGEJkUg7_Tw1_7Vl50NDorhE,3731
49
- castor_extractor/knowledge/confluence/client/client_test.py,sha256=LTT49ORl0DPTdDpKdREUErnwIA40xPy2C3uwdkVS1I0,3071
48
+ castor_extractor/knowledge/confluence/client/client.py,sha256=Ysl4KDo4ISXuLvUwEZrowGiaUHPC5cli7zN5Vtte_Jc,5392
49
+ castor_extractor/knowledge/confluence/client/client_test.py,sha256=O4YarFiZbm2z_H0zPzPi-awRhsGEY1iOwwHHTv9gCVA,4177
50
50
  castor_extractor/knowledge/confluence/client/credentials.py,sha256=tqUMw-SVoAi4o6I6OeGk4MeDiIPU3-ihhaomXv4CQ64,419
51
- castor_extractor/knowledge/confluence/client/endpoints.py,sha256=ClBzE8a5zqA4ngAecc8vMv9QJCdbtcv3GKuorZ8kOdA,1100
51
+ castor_extractor/knowledge/confluence/client/endpoints.py,sha256=q5JCybVPtSs4rEXmS5IdkJyFQZsx2ff838mJyxryEFo,1742
52
52
  castor_extractor/knowledge/confluence/client/pagination.py,sha256=ty4meiMEujDVSiQyOJTibd-ReYyDyGezdFuk7EAGtMA,862
53
- castor_extractor/knowledge/confluence/extract.py,sha256=mOAs5uvjM0LZJzrD36uFAt_nsiqQ48kKTDFIKru3LSo,1858
53
+ castor_extractor/knowledge/confluence/extract.py,sha256=Ba1IDDFkZUYJ2HDWNcXNJ1fzFYOTcE1sTDgqFAK9-fA,2332
54
+ castor_extractor/knowledge/confluence/utils.py,sha256=-lcpWY8oacDdg537s5-LsY8B3L7ZKGXT6GCd3yvVw-A,392
55
+ castor_extractor/knowledge/confluence/utils_test.py,sha256=NYlbKUSRjuZ8FaRMqhKNQCW3KmMAhqIVaMAHNNpobsk,1152
54
56
  castor_extractor/knowledge/notion/__init__.py,sha256=ZDmh0eNSxHf1zVPm0aYlKPci-vzOXhAgdsWjS2hdjh4,117
55
57
  castor_extractor/knowledge/notion/assets.py,sha256=QHv1-pomt5UeN_prP2L6t_zJ-tDSqB8LgopkGAODYPQ,164
56
58
  castor_extractor/knowledge/notion/client/__init__.py,sha256=CDPorBCethuNTEtpjvHGcWnWeVfqkEq-IbakWjDKATw,76
@@ -80,6 +82,11 @@ castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9
80
82
  castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
81
83
  castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
82
84
  castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
85
+ castor_extractor/transformation/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
86
+ castor_extractor/transformation/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
87
+ castor_extractor/transformation/dbt/client.py,sha256=KSlMHaesDOTYjTKs_ZFSYG1Udxjj5jn0m5zaCf5AZZk,5526
88
+ castor_extractor/transformation/dbt/client_test.py,sha256=YK86romOhTRFqzA2Gs262mDUfmvz0cmhiN3jC9nB8P8,4527
89
+ castor_extractor/transformation/dbt/credentials.py,sha256=pGq7GqFQTw9TwN1DXSHC-0yJ2H6B_wMAbHyQTLqJVh0,543
83
90
  castor_extractor/types.py,sha256=nHel2hv6NoHmdpOX_heEfO2-DnZPoYA2x0eJdbFvT0s,1276
84
91
  castor_extractor/uploader/__init__.py,sha256=A4bq_SrEtKAsl0r_D_duSTvL5WIQjVfsMy7tDx9IKg0,87
85
92
  castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
@@ -89,7 +96,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
89
96
  castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
90
97
  castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
91
98
  castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
92
- castor_extractor/utils/__init__.py,sha256=KQkr_CmxWG0Vpu7CaqjbJkffUeEWcyeA9Cbm394Hygk,1585
99
+ castor_extractor/utils/__init__.py,sha256=ybzci46Myi9LABZGBK2qXOiGd00llcpmBpvAp5LEVHc,1597
93
100
  castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
94
101
  castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
95
102
  castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
@@ -114,11 +121,6 @@ castor_extractor/utils/client/uri_test.py,sha256=1XKF6qSseCeD4G4ckaNO07JXfGbt7XU
114
121
  castor_extractor/utils/collection.py,sha256=FiIJWZZ865oqNjtTm40gQ13R9zh--W2W5YsMBZJf2bk,2334
115
122
  castor_extractor/utils/collection_test.py,sha256=XJAGo0Veg0H8wZRCESIkU2t8bXxTNET0BdosomO3-Ls,2104
116
123
  castor_extractor/utils/constants.py,sha256=qBQprS9U66mS-RIBXiLujdTSV3WvGv40Bc0khP4Abdk,39
117
- castor_extractor/utils/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
118
- castor_extractor/utils/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
119
- castor_extractor/utils/dbt/client.py,sha256=KSlMHaesDOTYjTKs_ZFSYG1Udxjj5jn0m5zaCf5AZZk,5526
120
- castor_extractor/utils/dbt/client_test.py,sha256=9mHhFLyQ-NrjyRKy0kCEp4hgMj2um7HrhTd452oyRbM,4526
121
- castor_extractor/utils/dbt/credentials.py,sha256=pGq7GqFQTw9TwN1DXSHC-0yJ2H6B_wMAbHyQTLqJVh0,543
122
124
  castor_extractor/utils/deprecate.py,sha256=aBIN2QqZUx5CBNZMFfOUhi8QqtPqRcJtmrN6xqfm-y8,805
123
125
  castor_extractor/utils/env.py,sha256=TqdtB50U8LE0993WhhEhpy89TJrHbjtIKjvg6KQ-5q0,596
124
126
  castor_extractor/utils/files.py,sha256=qKbfu5FRjsQdKnRmaJNd5EdX_F6gf5C5tV8LdoYKxs0,1527
@@ -153,7 +155,7 @@ castor_extractor/utils/string_test.py,sha256=u3P2tAPhyfCLvD19rH_JcpHhPuWTHUdg0z_
153
155
  castor_extractor/utils/time.py,sha256=jmP1QWg4lv21Jp_Oy71lfJ47hjNOSgHiBOFf964RMPU,1732
154
156
  castor_extractor/utils/time_test.py,sha256=pH8DSosNlwDYZXZNNjYDcL0WbmZc_c212LEEn88Oqew,647
155
157
  castor_extractor/utils/type.py,sha256=Sd8JlEgbGkBUZnRqCUDtREeBkOMTXtlNMyCph90_J0Q,328
156
- castor_extractor/utils/validation.py,sha256=kQAFtqt3gfy7YqYQ0u-60vyNYUF_96he5QDVUQnZmDo,1896
158
+ castor_extractor/utils/validation.py,sha256=dRvC9SoFVecVZuLQNN3URq37yX2sBSW3-NxIxkcol5o,1894
157
159
  castor_extractor/utils/validation_test.py,sha256=A7P6VmI0kYX2aGIeEN12y7LsY7Kpm8pE4bdVFhbBAMw,1184
158
160
  castor_extractor/utils/write.py,sha256=Z_RYm47XeHiUPPUMYMuAjQrVZ18CAkL3daQHQG1XPlM,2148
159
161
  castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -184,18 +186,17 @@ castor_extractor/visualization/looker/fields.py,sha256=7oC7p-3Wp7XHBP_FT_D1wH3kI
184
186
  castor_extractor/visualization/looker/fields_test.py,sha256=7Cwq8Qky6aTZg8nCHp1gmPJtd9pGNB4QeMIRRWdHo5w,782
185
187
  castor_extractor/visualization/looker/multithreading.py,sha256=Muuh3usBLqtv3sfHoyPYJ6jJ7V5ajR6N9ZJ_F-bNc60,2608
186
188
  castor_extractor/visualization/looker_studio/__init__.py,sha256=GccG-GJXoNhjXFPkw-rHHZ0SXVQTFKjqkMIYHVeu3T4,175
187
- castor_extractor/visualization/looker_studio/assets.py,sha256=lFIqr8EB6eK-Mf80R_x2qAscCyX7ZUcOcHVef1CM9B0,173
189
+ castor_extractor/visualization/looker_studio/assets.py,sha256=rI73rbVrfwkkepqZr0zPouP2lPUfJxSi21RKtOTHtAA,308
188
190
  castor_extractor/visualization/looker_studio/client/__init__.py,sha256=YkQaVDJa-7KSwdOLjtgKJMRiafbGNKC_46YVx0hYZ1Q,129
189
- castor_extractor/visualization/looker_studio/client/admin_sdk_client.py,sha256=hYKdU6TlWKkXx07r6HsZ4Wbxhasx8DP_jO6iDCjHjgk,3508
191
+ castor_extractor/visualization/looker_studio/client/admin_sdk_client.py,sha256=HIeyT9JTW1TPwVzD2Q-VfJ99jMP80Z-4CznKAnTnp2w,3493
190
192
  castor_extractor/visualization/looker_studio/client/client.py,sha256=6sTfLRUhuxhkqDjC2ZBEaw6YnR6ze8-_VW2rc1u9Ksk,3191
191
- castor_extractor/visualization/looker_studio/client/credentials.py,sha256=QImJPh8VctkrGt65UiU5hM12JI4WdCMSUFt88aiOoLw,657
193
+ castor_extractor/visualization/looker_studio/client/credentials.py,sha256=F4ISI8Ua_HJsMuGhYql28o3hKYR4sL_uzkrUkRiekRo,1347
192
194
  castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqNDdlDBOOpiF7LpjyHMrzeClJktidCr1pTDUs,669
193
195
  castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
194
- castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=oySC6rsppj67RSifxwSCw4bFrz1Irx6IFJhX7tc_v1E,4087
196
+ castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Phq378VEaFLD-nyP2_A1wge6HUP45jSthhlNjD7aqSg,4085
195
197
  castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
196
198
  castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=Ub4rdrJ5WTPWKI-eVmXrNMv0Ktmti4b-93zZBr0xEB0,1426
197
- castor_extractor/visualization/looker_studio/client/scopes.py,sha256=824cqqgZuGq4L-rPNoHJe0ibXsxkRwB0CLG_kqw9Q0g,256
198
- castor_extractor/visualization/looker_studio/extract.py,sha256=cHyroNZ1fKoBTvIbEebnKDrU3xpkcEgIPJy75ljCL70,2607
199
+ castor_extractor/visualization/looker_studio/extract.py,sha256=uNpvg4wtFflmpkqXFfo_9Nm12AEKXBOCKKajIggySho,3026
199
200
  castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
200
201
  castor_extractor/visualization/metabase/assets.py,sha256=nu3FwQBU_hdS2DBvgXAwQlEEi76QiNK2tMKEtMyctaY,2874
201
202
  castor_extractor/visualization/metabase/client/__init__.py,sha256=KBvaPMofBRV3m_sZAnKNCrJGr-Z88EbpdzEzWPQ_uBk,99
@@ -231,16 +232,16 @@ castor_extractor/visualization/mode/client/constants.py,sha256=_Si5AF6VnpoSfnNNg
231
232
  castor_extractor/visualization/mode/client/credentials.py,sha256=ptIpCCpoNt06yYaWQgl3Xu78_jVMoqsqWAGqQXVFZlo,606
232
233
  castor_extractor/visualization/mode/errors.py,sha256=SKpFT2AiLOuWx2VRLyO7jbAiKcGDFXXrsebpNEKtr0E,1495
233
234
  castor_extractor/visualization/mode/extract.py,sha256=PmLWWjUwplQh3TNMemiGwyFdxMcKVMvumZPxSMLJAwk,1625
234
- castor_extractor/visualization/powerbi/__init__.py,sha256=AJnmfdmm2mGaInWJkUfZxRqrI7dBkTUSebpow05g5zo,135
235
+ castor_extractor/visualization/powerbi/__init__.py,sha256=hoZ73ngLhMc9edqxO9PUIE3FABQlvcfY2W8fuc6DEjY,197
235
236
  castor_extractor/visualization/powerbi/assets.py,sha256=IB_XKwgdN1pZYGZ4RfeHrLjflianTzWf_6tg-4CIwu0,742
236
- castor_extractor/visualization/powerbi/client/__init__.py,sha256=8Bzhd9Z0ebVg2gDchXCOPa80Yqlq_9oCjbGi8u1M6J0,93
237
- castor_extractor/visualization/powerbi/client/authentication.py,sha256=fz0v9qxeADwA1jiS9UzAQN5mA5kmZT53onlcWon2RGw,892
238
- castor_extractor/visualization/powerbi/client/client.py,sha256=Y_rwkyPO3GbTAtaOClqnGVEK5iWmREIuDKDJx0LefHs,7203
239
- castor_extractor/visualization/powerbi/client/client_test.py,sha256=6NtpcKZCxBWyJO3phnVgE70Wmunb6tWsdXikkReJ02E,5539
237
+ castor_extractor/visualization/powerbi/client/__init__.py,sha256=UPIhMaCCdNxhiLdkItC0IPFE_AMi-SgqI_ahwjB9utI,151
238
+ castor_extractor/visualization/powerbi/client/authentication.py,sha256=cTohunKr1nUDfvxB0sejJSyfE2BdCtwT1WMPecWlbyU,1045
239
+ castor_extractor/visualization/powerbi/client/client.py,sha256=MbqqUF4yadjbAQ_I0iwOmdqR0qC1L3yfbDGgIZqZ0hQ,7348
240
+ castor_extractor/visualization/powerbi/client/client_test.py,sha256=MhQfg6kj__zpARXfv9-VrJXt1fXj6Eri91y8KA9Sn9E,5694
240
241
  castor_extractor/visualization/powerbi/client/constants.py,sha256=88R_aGachNNUZh6OSH2fkDwZtY4KTStzKm_g7HNCqqo,387
241
- castor_extractor/visualization/powerbi/client/credentials.py,sha256=sVi4ecJP8ydfrGRKKdJML-wxxZjxUshQtyqTUaJYq_g,795
242
+ castor_extractor/visualization/powerbi/client/credentials.py,sha256=OVWdhZSNODzTdLysY-sbpBZ3uUkLokeayQZnbJAqt2I,1386
242
243
  castor_extractor/visualization/powerbi/client/credentials_test.py,sha256=TzFqxsWVQ3sXR_n0bJsexK9Uz7ceXCEPVqDGWTJzW60,993
243
- castor_extractor/visualization/powerbi/client/endpoints.py,sha256=DrAFpYHhp9Z7fxebdy_Ir6LFfFluHRBJId4tVCYTUVs,2051
244
+ castor_extractor/visualization/powerbi/client/endpoints.py,sha256=38ZETzSSnNq3vA9O6nLZQ8T1BVE01R9CjMC03-PRXsM,1911
244
245
  castor_extractor/visualization/powerbi/client/pagination.py,sha256=OZMjoDQPRGMoWd9QcKKrPh3aErJR20SHlrTqY_siLkk,755
245
246
  castor_extractor/visualization/powerbi/extract.py,sha256=Z5KbqMhMnqjWcnzged2G1-Gf6GYWJobTL9_TpAdgb8o,1309
246
247
  castor_extractor/visualization/qlik/__init__.py,sha256=u6lIfm_WOykBwt6SlaB7C0Dtx37XBliUbM5oWv26gC8,177
@@ -277,7 +278,7 @@ castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZ
277
278
  castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
278
279
  castor_extractor/visualization/strategy/assets.py,sha256=tqB3GOtp-r7IOnYO8UxZgrldoSMImJnv5KeIwDFxg68,302
279
280
  castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJkDfX-RSJn3HF2ceQ0Yx1PLCfB3BBo,80
280
- castor_extractor/visualization/strategy/client/client.py,sha256=F7taX0jSQpM8R3GOGeUQ7U_bJKkoHTwAc9oyc3ZDxbM,10261
281
+ castor_extractor/visualization/strategy/client/client.py,sha256=_K7JkatG0DYtbQOJULTNYKHWuBZ11KMR_rQjx8LiR5c,10242
281
282
  castor_extractor/visualization/strategy/client/credentials.py,sha256=urFfNxWX1JG6wwFMYImufQzHa5g-sgjdlVGzi63owwg,1113
282
283
  castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C3en6t1-BlM5TbusjHyEkg,1166
283
284
  castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
@@ -424,8 +425,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
424
425
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
425
426
  castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
426
427
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
427
- castor_extractor-0.24.15.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
428
- castor_extractor-0.24.15.dist-info/METADATA,sha256=ktAgO-d5jJmInoD_VCLwIT522Qy31paP3Smh_TGa6MI,24851
429
- castor_extractor-0.24.15.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
430
- castor_extractor-0.24.15.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
431
- castor_extractor-0.24.15.dist-info/RECORD,,
428
+ castor_extractor-0.24.21.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
429
+ castor_extractor-0.24.21.dist-info/METADATA,sha256=Yg7Sgskg-uUeas31S1Uit1F7L1tPcVCLFy2U3rBlzIY,25312
430
+ castor_extractor-0.24.21.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
431
+ castor_extractor-0.24.21.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
432
+ castor_extractor-0.24.21.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- SCOPES = (
2
- "https://www.googleapis.com/auth/datastudio",
3
- "https://www.googleapis.com/auth/userinfo.profile",
4
- "https://www.googleapis.com/auth/admin.reports.audit.readonly",
5
- "https://www.googleapis.com/auth/admin.directory.user.readonly",
6
- )