castor-extractor 0.24.10__py3-none-any.whl → 0.24.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +12 -0
- README.md +5 -2
- castor_extractor/commands/extract_strategy.py +26 -0
- castor_extractor/visualization/strategy/__init__.py +3 -0
- castor_extractor/visualization/strategy/assets.py +14 -0
- castor_extractor/visualization/strategy/client/__init__.py +2 -0
- castor_extractor/visualization/strategy/client/client.py +313 -0
- castor_extractor/visualization/strategy/client/credentials.py +38 -0
- castor_extractor/visualization/strategy/extract.py +43 -0
- castor_extractor/warehouse/redshift/queries/view_ddl.sql +19 -5
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.13.dist-info}/METADATA +21 -4
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.13.dist-info}/RECORD +15 -8
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.13.dist-info}/entry_points.txt +1 -0
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.13.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.10.dist-info → castor_extractor-0.24.13.dist-info}/WHEEL +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.13 - 2025-05-05
|
|
4
|
+
|
|
5
|
+
* Rollback cloud-storage version as it's not compatible with Keboola
|
|
6
|
+
|
|
7
|
+
## 0.24.12 - 2025-05-05
|
|
8
|
+
|
|
9
|
+
* Redshift - fix query definition of materialized views
|
|
10
|
+
|
|
11
|
+
## 0.24.11 - 2025-05-05
|
|
12
|
+
|
|
13
|
+
* add support for Strategy (formerly MicroStrategy)
|
|
14
|
+
|
|
3
15
|
## 0.24.10 - 2025-04-30
|
|
4
16
|
|
|
5
17
|
* Tableau - skip warnings instead of raising an error
|
README.md
CHANGED
|
@@ -37,6 +37,8 @@ It also embeds utilities to help you push your metadata to Castor:
|
|
|
37
37
|
Requirements: **python3.9+**
|
|
38
38
|
<img src="https://upload.wikimedia.org/wikipedia/commons/c/c3/Python-logo-notext.svg" width=20 />
|
|
39
39
|
|
|
40
|
+
**Note:** The Strategy command requires **python3.10+**. All other modules work with python3.9+.
|
|
41
|
+
|
|
40
42
|
### Create castor-env
|
|
41
43
|
|
|
42
44
|
We advise to create a dedicated [Python environment](https://docs.python.org/3/library/venv.html).
|
|
@@ -115,6 +117,7 @@ pip install castor-extractor[postgres]
|
|
|
115
117
|
pip install castor-extractor[redshift]
|
|
116
118
|
pip install castor-extractor[snowflake]
|
|
117
119
|
pip install castor-extractor[sqlserver]
|
|
120
|
+
pip install castor-extractor[strategy]
|
|
118
121
|
pip install castor-extractor[tableau]
|
|
119
122
|
```
|
|
120
123
|
|
|
@@ -138,6 +141,6 @@ export CASTOR_OUTPUT_DIRECTORY="/tmp/castor"
|
|
|
138
141
|
|
|
139
142
|
## Contact
|
|
140
143
|
|
|
141
|
-
For any questions or bug report, contact us at [support@
|
|
144
|
+
For any questions or bug report, contact us at [support@coalesce.io](mailto:support@coalesce.io)
|
|
142
145
|
|
|
143
|
-
[
|
|
146
|
+
[Catalog from Coalesce](https://castordoc.com) helps you find, understand, use your data assets
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from argparse import ArgumentParser
|
|
3
|
+
|
|
4
|
+
from castor_extractor.utils import parse_filled_arguments # type: ignore
|
|
5
|
+
from castor_extractor.visualization import strategy # type: ignore
|
|
6
|
+
|
|
7
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
parser = ArgumentParser()
|
|
12
|
+
|
|
13
|
+
parser.add_argument("-u", "--username", help="Strategy username")
|
|
14
|
+
parser.add_argument("-p", "--password", help="Strategy password")
|
|
15
|
+
parser.add_argument("-b", "--base-url", help="Strategy instance URL")
|
|
16
|
+
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
17
|
+
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"-i",
|
|
20
|
+
"--project-ids",
|
|
21
|
+
nargs="*",
|
|
22
|
+
help="Optional list of project IDs",
|
|
23
|
+
default=None,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
strategy.extract_all(**parse_filled_arguments(parser))
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from ...types import ExternalAsset
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class StrategyAsset(ExternalAsset):
|
|
5
|
+
"""Strategy assets that can be extracted"""
|
|
6
|
+
|
|
7
|
+
ATTRIBUTE = "attribute"
|
|
8
|
+
CUBE = "cube"
|
|
9
|
+
DASHBOARD = "dashboard"
|
|
10
|
+
DOCUMENT = "document"
|
|
11
|
+
FACT = "fact"
|
|
12
|
+
METRIC = "metric"
|
|
13
|
+
REPORT = "report"
|
|
14
|
+
USER = "user"
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
from mstrio.connection import Connection # type: ignore
|
|
8
|
+
from mstrio.helpers import IServerError # type: ignore
|
|
9
|
+
from mstrio.modeling import ( # type: ignore
|
|
10
|
+
list_attributes,
|
|
11
|
+
list_facts,
|
|
12
|
+
list_metrics,
|
|
13
|
+
)
|
|
14
|
+
from mstrio.project_objects import ( # type: ignore
|
|
15
|
+
Report,
|
|
16
|
+
list_dashboards,
|
|
17
|
+
list_documents,
|
|
18
|
+
list_olap_cubes,
|
|
19
|
+
list_reports,
|
|
20
|
+
)
|
|
21
|
+
from mstrio.server import Environment # type: ignore
|
|
22
|
+
from mstrio.types import ObjectSubTypes, ObjectTypes # type: ignore
|
|
23
|
+
from mstrio.users_and_groups import User, list_users # type: ignore
|
|
24
|
+
from mstrio.utils.entity import Entity # type: ignore
|
|
25
|
+
from mstrio.utils.helper import is_dashboard # type: ignore
|
|
26
|
+
from pydantic import BaseModel, ConfigDict
|
|
27
|
+
|
|
28
|
+
from ..assets import StrategyAsset
|
|
29
|
+
from .credentials import StrategyCredentials
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
_BATCH_SIZE: int = 100
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class URLTemplates(Enum):
|
|
37
|
+
DASHBOARD = (
|
|
38
|
+
"https://{hostname}/MicroStrategyLibrarySTD/app/{project_id}/{id_}"
|
|
39
|
+
)
|
|
40
|
+
DOCUMENT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?documentID={id_}&projectID={project_id}"
|
|
41
|
+
REPORT = "https://{hostname}/MicroStrategy/servlet/mstrWeb?reportID={id_}&projectID={project_id}"
|
|
42
|
+
FOLDER = "https://{hostname}/MicroStrategy/servlet/mstrWeb?folderID={id_}&projectID={project_id}"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _is_dashboard(entity: Entity) -> bool:
|
|
46
|
+
"""
|
|
47
|
+
Returns True if the entity is a Dashboard. They can only be distinguished
|
|
48
|
+
from Documents by checking the `view_media` property.
|
|
49
|
+
"""
|
|
50
|
+
is_type_document = entity.type == ObjectTypes.DOCUMENT_DEFINITION
|
|
51
|
+
return is_type_document and is_dashboard(entity.view_media)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _is_report(entity: Entity) -> bool:
|
|
55
|
+
"""
|
|
56
|
+
Returns True if the entity is a Report. Cubes share the same type as Reports,
|
|
57
|
+
so the subtype must be checked.
|
|
58
|
+
"""
|
|
59
|
+
is_type_report = entity.type == ObjectTypes.REPORT_DEFINITION
|
|
60
|
+
is_subtype_cube = entity.subtype == ObjectSubTypes.OLAP_CUBE.value
|
|
61
|
+
return is_type_report and not is_subtype_cube
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _safe_get_attribute(entity: Entity, attribute: str) -> Optional[str]:
|
|
65
|
+
"""
|
|
66
|
+
Some properties may raise an error. Example: retrieving a Report's `sql` fails if the Report has not been published.
|
|
67
|
+
This safely returns the attribute value, or None if the retrieval fails.
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
value = getattr(entity, attribute)
|
|
71
|
+
except IServerError as e:
|
|
72
|
+
logger.error(f"Could not get {attribute} for entity {entity.id}: {e}")
|
|
73
|
+
value = None
|
|
74
|
+
return value
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Dependency(BaseModel):
|
|
78
|
+
id: str
|
|
79
|
+
name: str
|
|
80
|
+
subtype: int
|
|
81
|
+
type: int
|
|
82
|
+
|
|
83
|
+
model_config = ConfigDict(extra="ignore")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _list_dependencies(entity: Entity) -> list[dict]:
|
|
87
|
+
"""Lists the entity's dependencies, keeping only relevant fields."""
|
|
88
|
+
dependencies: list[dict] = []
|
|
89
|
+
|
|
90
|
+
offset = 0
|
|
91
|
+
while True:
|
|
92
|
+
batch = entity.list_dependencies(offset=offset, limit=_BATCH_SIZE)
|
|
93
|
+
dependencies.extend(batch)
|
|
94
|
+
if len(batch) < _BATCH_SIZE:
|
|
95
|
+
break
|
|
96
|
+
offset += _BATCH_SIZE
|
|
97
|
+
|
|
98
|
+
return [
|
|
99
|
+
Dependency(**dependency).model_dump() for dependency in dependencies
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _level_1_folder_id(folders: list[dict]) -> str:
|
|
104
|
+
"""Searches for the first enclosing folder and returns its ID."""
|
|
105
|
+
for folder in folders:
|
|
106
|
+
if folder["level"] == 1:
|
|
107
|
+
return folder["id"]
|
|
108
|
+
|
|
109
|
+
raise ValueError("No level 1 folder found")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class StrategyClient:
|
|
113
|
+
"""Connect to Strategy through mstrio-py and fetch main assets."""
|
|
114
|
+
|
|
115
|
+
def __init__(self, credentials: StrategyCredentials):
|
|
116
|
+
self.base_url = credentials.base_url
|
|
117
|
+
self.connection = Connection(
|
|
118
|
+
base_url=self.base_url,
|
|
119
|
+
username=credentials.username,
|
|
120
|
+
password=credentials.password,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
self.hostname = urlparse(self.base_url).hostname
|
|
124
|
+
|
|
125
|
+
if credentials.project_ids:
|
|
126
|
+
self.project_ids = credentials.project_ids
|
|
127
|
+
else:
|
|
128
|
+
env = Environment(connection=self.connection)
|
|
129
|
+
self.project_ids = [project.id for project in env.list_projects()]
|
|
130
|
+
|
|
131
|
+
def close(self):
|
|
132
|
+
self.connection.close()
|
|
133
|
+
|
|
134
|
+
def _url(self, entity: Entity) -> str:
|
|
135
|
+
"""
|
|
136
|
+
Formats the right URL.
|
|
137
|
+
* Dashboards : viewed in MicroStrategy
|
|
138
|
+
* Reports and Documents : viewed in MicroStrategy Web
|
|
139
|
+
* other (i.e. Cubes): the URL leads to the folder in MicroStrategy Web
|
|
140
|
+
"""
|
|
141
|
+
if _is_dashboard(entity):
|
|
142
|
+
id_ = entity.id
|
|
143
|
+
template = URLTemplates.DASHBOARD
|
|
144
|
+
|
|
145
|
+
elif entity.type == ObjectTypes.DOCUMENT_DEFINITION:
|
|
146
|
+
id_ = entity.id
|
|
147
|
+
template = URLTemplates.DOCUMENT
|
|
148
|
+
|
|
149
|
+
elif _is_report(entity):
|
|
150
|
+
id_ = entity.id
|
|
151
|
+
template = URLTemplates.REPORT
|
|
152
|
+
|
|
153
|
+
else:
|
|
154
|
+
# default to folder URL
|
|
155
|
+
id_ = _level_1_folder_id(entity.ancestors)
|
|
156
|
+
template = URLTemplates.FOLDER
|
|
157
|
+
|
|
158
|
+
return template.value.format(
|
|
159
|
+
hostname=self.hostname,
|
|
160
|
+
id_=id_,
|
|
161
|
+
project_id=entity.project_id,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def _common_entity_properties(
|
|
165
|
+
self,
|
|
166
|
+
entity: Entity,
|
|
167
|
+
with_url: bool = True,
|
|
168
|
+
with_description: bool = True,
|
|
169
|
+
) -> dict:
|
|
170
|
+
"""
|
|
171
|
+
Returns the entity's properties, including its dependencies
|
|
172
|
+
and optional URL and/or description.
|
|
173
|
+
"""
|
|
174
|
+
dependencies = _list_dependencies(entity)
|
|
175
|
+
owner_id = entity.owner.id if isinstance(entity.owner, User) else None
|
|
176
|
+
properties = {
|
|
177
|
+
"dependencies": dependencies,
|
|
178
|
+
"id": entity.id,
|
|
179
|
+
"location": entity.location,
|
|
180
|
+
"name": entity.name,
|
|
181
|
+
"owner_id": owner_id,
|
|
182
|
+
"subtype": entity.subtype,
|
|
183
|
+
"type": entity.type.value,
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if with_url:
|
|
187
|
+
properties["url"] = self._url(entity)
|
|
188
|
+
|
|
189
|
+
if with_description:
|
|
190
|
+
properties["description"] = _safe_get_attribute(
|
|
191
|
+
entity, "description"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return properties
|
|
195
|
+
|
|
196
|
+
def _report_properties(self, report: Report) -> dict[str, Any]:
|
|
197
|
+
"""
|
|
198
|
+
Report properties contain an optional SQL source query. Due to a typing
|
|
199
|
+
bug in the mstrio package, the typing must be ignored.
|
|
200
|
+
"""
|
|
201
|
+
properties = self._common_entity_properties(report) # type: ignore
|
|
202
|
+
properties["url"] = self._url(report) # type: ignore
|
|
203
|
+
properties["sql"] = _safe_get_attribute(report, "sql") # type: ignore
|
|
204
|
+
return properties
|
|
205
|
+
|
|
206
|
+
@staticmethod
|
|
207
|
+
def _user_properties(user: User) -> dict[str, Any]:
|
|
208
|
+
return {
|
|
209
|
+
"id": user.id,
|
|
210
|
+
"name": user.name,
|
|
211
|
+
"username": user.username,
|
|
212
|
+
"email": user.default_email_address,
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
def _fetch_entities(
|
|
216
|
+
self,
|
|
217
|
+
extract_callback: Callable,
|
|
218
|
+
with_url: bool = True,
|
|
219
|
+
with_description: bool = True,
|
|
220
|
+
custom_property_extractor: Optional[Callable] = None,
|
|
221
|
+
) -> Iterator[dict[str, Any]]:
|
|
222
|
+
"""
|
|
223
|
+
Yields all entities across all projects using the given retrieval function from the mstrio package.
|
|
224
|
+
"""
|
|
225
|
+
for project_id in self.project_ids:
|
|
226
|
+
self.connection.select_project(project_id=project_id)
|
|
227
|
+
|
|
228
|
+
entities = extract_callback(connection=self.connection)
|
|
229
|
+
|
|
230
|
+
for entity in entities:
|
|
231
|
+
try:
|
|
232
|
+
if custom_property_extractor:
|
|
233
|
+
yield custom_property_extractor(entity)
|
|
234
|
+
else:
|
|
235
|
+
yield self._common_entity_properties(
|
|
236
|
+
entity,
|
|
237
|
+
with_url=with_url,
|
|
238
|
+
with_description=with_description,
|
|
239
|
+
)
|
|
240
|
+
except IServerError as e:
|
|
241
|
+
logger.error(
|
|
242
|
+
f"Could not fetch attributes for entity {entity.id}: {e}"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
def _fetch_attributes(self) -> Iterator[dict[str, Any]]:
|
|
246
|
+
return self._fetch_entities(
|
|
247
|
+
list_attributes,
|
|
248
|
+
with_url=False,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
def _fetch_cubes(self) -> Iterator[dict[str, Any]]:
|
|
252
|
+
return self._fetch_entities(list_olap_cubes)
|
|
253
|
+
|
|
254
|
+
def _fetch_dashboards(self) -> Iterator[dict[str, Any]]:
|
|
255
|
+
return self._fetch_entities(list_dashboards)
|
|
256
|
+
|
|
257
|
+
def _fetch_documents(self) -> Iterator[dict[str, Any]]:
|
|
258
|
+
return self._fetch_entities(list_documents)
|
|
259
|
+
|
|
260
|
+
def _fetch_facts(self) -> Iterator[dict[str, Any]]:
|
|
261
|
+
"""Yields all facts. Descriptions are not needed for this entity type."""
|
|
262
|
+
return self._fetch_entities(
|
|
263
|
+
list_facts,
|
|
264
|
+
with_url=False,
|
|
265
|
+
with_description=False,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def _fetch_metrics(self) -> Iterator[dict[str, Any]]:
|
|
269
|
+
return self._fetch_entities(
|
|
270
|
+
list_metrics,
|
|
271
|
+
with_url=False,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
def _fetch_reports(self) -> Iterator[dict[str, Any]]:
|
|
275
|
+
return self._fetch_entities(
|
|
276
|
+
list_reports,
|
|
277
|
+
custom_property_extractor=self._report_properties,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def _fetch_users(self) -> Iterator[dict[str, Any]]:
|
|
281
|
+
return self._fetch_entities(
|
|
282
|
+
list_users,
|
|
283
|
+
custom_property_extractor=self._user_properties,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
def fetch(self, asset: StrategyAsset):
|
|
287
|
+
"""Fetch the given asset type from Strategy"""
|
|
288
|
+
if asset == StrategyAsset.ATTRIBUTE:
|
|
289
|
+
yield from self._fetch_attributes()
|
|
290
|
+
|
|
291
|
+
elif asset == StrategyAsset.CUBE:
|
|
292
|
+
yield from self._fetch_cubes()
|
|
293
|
+
|
|
294
|
+
elif asset == StrategyAsset.DASHBOARD:
|
|
295
|
+
yield from self._fetch_dashboards()
|
|
296
|
+
|
|
297
|
+
elif asset == StrategyAsset.DOCUMENT:
|
|
298
|
+
yield from self._fetch_documents()
|
|
299
|
+
|
|
300
|
+
elif asset == StrategyAsset.FACT:
|
|
301
|
+
yield from self._fetch_facts()
|
|
302
|
+
|
|
303
|
+
elif asset == StrategyAsset.METRIC:
|
|
304
|
+
yield from self._fetch_metrics()
|
|
305
|
+
|
|
306
|
+
elif asset == StrategyAsset.REPORT:
|
|
307
|
+
yield from self._fetch_reports()
|
|
308
|
+
|
|
309
|
+
elif asset == StrategyAsset.USER:
|
|
310
|
+
yield from self._fetch_users()
|
|
311
|
+
|
|
312
|
+
else:
|
|
313
|
+
raise NotImplementedError(f"Asset type {asset} not implemented yet")
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, field_validator
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
STRATEGY_ENV_PREFIX = "CATALOG_STRATEGY_"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StrategyCredentials(BaseSettings):
|
|
10
|
+
model_config = SettingsConfigDict(
|
|
11
|
+
env_prefix=STRATEGY_ENV_PREFIX,
|
|
12
|
+
extra="ignore",
|
|
13
|
+
populate_by_name=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
base_url: str
|
|
17
|
+
password: str = Field(repr=False)
|
|
18
|
+
username: str
|
|
19
|
+
|
|
20
|
+
project_ids: Optional[list[str]] = None
|
|
21
|
+
|
|
22
|
+
@field_validator("project_ids", mode="before")
|
|
23
|
+
@classmethod
|
|
24
|
+
def _check_project_ids(cls, project_ids: Any) -> Optional[list[str]]:
|
|
25
|
+
"""
|
|
26
|
+
The project IDs are optional and can be either a list of strings
|
|
27
|
+
or single string with project IDs separated by commas.
|
|
28
|
+
"""
|
|
29
|
+
if project_ids is None:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
if isinstance(project_ids, str):
|
|
33
|
+
return [item.strip() for item in project_ids.split(",")]
|
|
34
|
+
|
|
35
|
+
if isinstance(project_ids, list):
|
|
36
|
+
return project_ids
|
|
37
|
+
|
|
38
|
+
raise ValueError(f"Unexpected type for project_id: {type(project_ids)}")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
from ...utils import (
|
|
6
|
+
OUTPUT_DIR,
|
|
7
|
+
current_timestamp,
|
|
8
|
+
deep_serialize,
|
|
9
|
+
from_env,
|
|
10
|
+
get_output_filename,
|
|
11
|
+
write_json,
|
|
12
|
+
write_summary,
|
|
13
|
+
)
|
|
14
|
+
from .assets import StrategyAsset
|
|
15
|
+
from .client import StrategyClient, StrategyCredentials
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def iterate_all_data(
|
|
21
|
+
client: StrategyClient,
|
|
22
|
+
) -> Iterable[tuple[str, Union[list, dict]]]:
|
|
23
|
+
"""Iterate over the extracted data from Strategy"""
|
|
24
|
+
|
|
25
|
+
for asset in StrategyAsset:
|
|
26
|
+
logger.info(f"Extracting {asset.value.upper()} from REST API")
|
|
27
|
+
data = client.fetch(asset)
|
|
28
|
+
yield asset.name.lower(), list(deep_serialize(data))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def extract_all(**kwargs) -> None:
|
|
32
|
+
_output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
33
|
+
credentials = StrategyCredentials(**kwargs)
|
|
34
|
+
|
|
35
|
+
client = StrategyClient(credentials=credentials)
|
|
36
|
+
ts = current_timestamp()
|
|
37
|
+
|
|
38
|
+
for key, data in iterate_all_data(client):
|
|
39
|
+
filename = get_output_filename(key, _output_directory, ts)
|
|
40
|
+
write_json(filename, data)
|
|
41
|
+
|
|
42
|
+
client.close()
|
|
43
|
+
write_summary(_output_directory, ts)
|
|
@@ -1,13 +1,27 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/*
|
|
2
|
+
This query was inspired from this thread:
|
|
3
|
+
https://github.com/awslabs/amazon-redshift-utils/blob/master/src/AdminViews/v_generate_view_ddl.sql
|
|
4
|
+
|
|
5
|
+
Notable differences:
|
|
6
|
+
* There is no "--DROP" statement/comment here
|
|
7
|
+
* Left-trimming the view definition is necessary to capture "CREATE" statements starting with whitespaces or line breaks.
|
|
8
|
+
*/
|
|
9
|
+
|
|
3
10
|
SELECT
|
|
4
11
|
CURRENT_DATABASE() AS database_name,
|
|
5
12
|
n.nspname AS schema_name,
|
|
6
13
|
c.relname AS view_name,
|
|
7
14
|
CASE
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
15
|
+
WHEN c.relnatts > 0 THEN
|
|
16
|
+
CASE
|
|
17
|
+
STRPOS(LOWER(LTRIM(pg_get_viewdef(c.oid, TRUE), '\t\r\n ')), 'create')
|
|
18
|
+
WHEN 1 THEN '' -- CREATE statement already present
|
|
19
|
+
ELSE -- No CREATE statement present, so no materialized view anyway
|
|
20
|
+
'CREATE OR REPLACE VIEW ' || QUOTE_IDENT(n.nspname) || '.' || QUOTE_IDENT(c.relname) || ' AS\n'
|
|
21
|
+
END
|
|
22
|
+
|| COALESCE(pg_get_viewdef(c.oid, TRUE), '')
|
|
23
|
+
ELSE COALESCE(pg_get_viewdef(c.oid, TRUE), '')
|
|
24
|
+
END AS view_definition
|
|
11
25
|
FROM
|
|
12
26
|
pg_catalog.pg_class AS c
|
|
13
27
|
INNER JOIN
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.13
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -28,6 +28,7 @@ Provides-Extra: qlik
|
|
|
28
28
|
Provides-Extra: redshift
|
|
29
29
|
Provides-Extra: snowflake
|
|
30
30
|
Provides-Extra: sqlserver
|
|
31
|
+
Provides-Extra: strategy
|
|
31
32
|
Provides-Extra: tableau
|
|
32
33
|
Requires-Dist: cryptography (>=43.0.0,<44.0.0) ; extra == "snowflake"
|
|
33
34
|
Requires-Dist: databricks-sql-connector (==3.6.0) ; extra == "databricks" or extra == "all"
|
|
@@ -35,11 +36,12 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
|
35
36
|
Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
|
|
36
37
|
Requires-Dist: google-auth (>=2,<3)
|
|
37
38
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
38
|
-
Requires-Dist: google-cloud-storage (>=3
|
|
39
|
+
Requires-Dist: google-cloud-storage (>=2,<3)
|
|
39
40
|
Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
|
|
40
41
|
Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
|
|
41
42
|
Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
|
|
42
43
|
Requires-Dist: msal (>=1.20.0,<2.0.0) ; extra == "powerbi" or extra == "all"
|
|
44
|
+
Requires-Dist: mstrio-py (>=11.5.3.101,<12.0.0.0) ; (python_version >= "3.10") and (extra == "strategy" or extra == "all")
|
|
43
45
|
Requires-Dist: numpy (<2) ; extra == "bigquery" or extra == "databricks" or extra == "all"
|
|
44
46
|
Requires-Dist: numpy (>=1.26) ; (python_version >= "3.12" and python_version < "3.13") and (extra == "bigquery" or extra == "databricks" or extra == "all")
|
|
45
47
|
Requires-Dist: pandas (>=2.1) ; (python_version >= "3.12" and python_version < "3.13") and (extra == "databricks" or extra == "all")
|
|
@@ -103,6 +105,8 @@ It also embeds utilities to help you push your metadata to Castor:
|
|
|
103
105
|
Requirements: **python3.9+**
|
|
104
106
|
<img src="https://upload.wikimedia.org/wikipedia/commons/c/c3/Python-logo-notext.svg" width=20 />
|
|
105
107
|
|
|
108
|
+
**Note:** The Strategy command requires **python3.10+**. All other modules work with python3.9+.
|
|
109
|
+
|
|
106
110
|
### Create castor-env
|
|
107
111
|
|
|
108
112
|
We advise to create a dedicated [Python environment](https://docs.python.org/3/library/venv.html).
|
|
@@ -181,6 +185,7 @@ pip install castor-extractor[postgres]
|
|
|
181
185
|
pip install castor-extractor[redshift]
|
|
182
186
|
pip install castor-extractor[snowflake]
|
|
183
187
|
pip install castor-extractor[sqlserver]
|
|
188
|
+
pip install castor-extractor[strategy]
|
|
184
189
|
pip install castor-extractor[tableau]
|
|
185
190
|
```
|
|
186
191
|
|
|
@@ -204,12 +209,24 @@ export CASTOR_OUTPUT_DIRECTORY="/tmp/castor"
|
|
|
204
209
|
|
|
205
210
|
## Contact
|
|
206
211
|
|
|
207
|
-
For any questions or bug report, contact us at [support@
|
|
212
|
+
For any questions or bug report, contact us at [support@coalesce.io](mailto:support@coalesce.io)
|
|
208
213
|
|
|
209
|
-
[
|
|
214
|
+
[Catalog from Coalesce](https://castordoc.com) helps you find, understand, use your data assets
|
|
210
215
|
|
|
211
216
|
# Changelog
|
|
212
217
|
|
|
218
|
+
## 0.24.13 - 2025-05-05
|
|
219
|
+
|
|
220
|
+
* Rollback cloud-storage version as it's not compatible with Keboola
|
|
221
|
+
|
|
222
|
+
## 0.24.12 - 2025-05-05
|
|
223
|
+
|
|
224
|
+
* Redshift - fix query definition of materialized views
|
|
225
|
+
|
|
226
|
+
## 0.24.11 - 2025-05-05
|
|
227
|
+
|
|
228
|
+
* add support for Strategy (formerly MicroStrategy)
|
|
229
|
+
|
|
213
230
|
## 0.24.10 - 2025-04-30
|
|
214
231
|
|
|
215
232
|
* Tableau - skip warnings instead of raising an error
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=u3BUqDPLtQ7K1RytlxEmtcSy4kJG1qhPtmrimODTYZU,17156
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
5
|
-
README.md,sha256=
|
|
5
|
+
README.md,sha256=C6hTyZO60T7z7xwHbspHlii384Jn02k0Rycxu3bCX0o,3866
|
|
6
6
|
castor_extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
castor_extractor/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
castor_extractor/commands/extract_bigquery.py,sha256=dU4OiYO1V0n32orvZnMh1_xtFKF_VxHNXcVsH3otY-g,1269
|
|
@@ -25,6 +25,7 @@ castor_extractor/commands/extract_salesforce_reporting.py,sha256=FdANTNiLkIPdm80
|
|
|
25
25
|
castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_9bYjUMg7BOW-k,643
|
|
26
26
|
castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
|
|
27
27
|
castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
|
|
28
|
+
castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
|
|
28
29
|
castor_extractor/commands/extract_tableau.py,sha256=xXlLKLN8Eu_a8Kt2F4E-C5D-gq8SUmvoxJcdR_thKKY,1365
|
|
29
30
|
castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
|
|
30
31
|
castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
|
|
@@ -272,6 +273,12 @@ castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ7
|
|
|
272
273
|
castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
|
|
273
274
|
castor_extractor/visualization/sigma/client/pagination.py,sha256=kNEhNq08tTGbypyMjxs0w4uvDtQc_iaWpOZweaa_FsU,690
|
|
274
275
|
castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZexu48EaY9tUwi14gzM,2298
|
|
276
|
+
castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
|
|
277
|
+
castor_extractor/visualization/strategy/assets.py,sha256=tqB3GOtp-r7IOnYO8UxZgrldoSMImJnv5KeIwDFxg68,302
|
|
278
|
+
castor_extractor/visualization/strategy/client/__init__.py,sha256=XWP0yF5j6JefDJkDfX-RSJn3HF2ceQ0Yx1PLCfB3BBo,80
|
|
279
|
+
castor_extractor/visualization/strategy/client/client.py,sha256=F7taX0jSQpM8R3GOGeUQ7U_bJKkoHTwAc9oyc3ZDxbM,10261
|
|
280
|
+
castor_extractor/visualization/strategy/client/credentials.py,sha256=urFfNxWX1JG6wwFMYImufQzHa5g-sgjdlVGzi63owwg,1113
|
|
281
|
+
castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C3en6t1-BlM5TbusjHyEkg,1166
|
|
275
282
|
castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
|
|
276
283
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
277
284
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
@@ -375,7 +382,7 @@ castor_extractor/warehouse/redshift/queries/schema.sql,sha256=Mf6nooi2w2PhGxM2_k
|
|
|
375
382
|
castor_extractor/warehouse/redshift/queries/table.sql,sha256=y8CGOwPHH_Mr8g1Zvuz2U5ldL8zuPm5v3M5RPZqIhsE,2645
|
|
376
383
|
castor_extractor/warehouse/redshift/queries/table_freshness.sql,sha256=l61_ysmTEtuMwK9RmYmD5cu0HmD1RXwTEhX0ytBeyxg,726
|
|
377
384
|
castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6WfwsX6VavoMb2VqYA32f6Dt-_Y,170
|
|
378
|
-
castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=
|
|
385
|
+
castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=VxrZ6VFHQy46AoIuLTwb2DZ8CWbUM9JLzyfp5jc3m6E,1232
|
|
379
386
|
castor_extractor/warehouse/redshift/query.py,sha256=hQaBHj0OvoEQ_HehU-vPyd5JH7YgndbzVi9-pyA5k_U,1054
|
|
380
387
|
castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
|
|
381
388
|
castor_extractor/warehouse/salesforce/client.py,sha256=067ZyccmIYoY6VwLTSneefOJqUpobtnoEzxJMY2oSPs,3268
|
|
@@ -416,8 +423,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
416
423
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
417
424
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
418
425
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
419
|
-
castor_extractor-0.24.
|
|
420
|
-
castor_extractor-0.24.
|
|
421
|
-
castor_extractor-0.24.
|
|
422
|
-
castor_extractor-0.24.
|
|
423
|
-
castor_extractor-0.24.
|
|
426
|
+
castor_extractor-0.24.13.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
427
|
+
castor_extractor-0.24.13.dist-info/METADATA,sha256=TBjkOLzvfC6wrL6Myv0N7IkcNXzkQDTqdd8QdbNpbxU,24609
|
|
428
|
+
castor_extractor-0.24.13.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
429
|
+
castor_extractor-0.24.13.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
430
|
+
castor_extractor-0.24.13.dist-info/RECORD,,
|
|
@@ -19,6 +19,7 @@ castor-extract-salesforce-viz=castor_extractor.commands.extract_salesforce_repor
|
|
|
19
19
|
castor-extract-sigma=castor_extractor.commands.extract_sigma:main
|
|
20
20
|
castor-extract-snowflake=castor_extractor.commands.extract_snowflake:main
|
|
21
21
|
castor-extract-sqlserver=castor_extractor.commands.extract_sqlserver:main
|
|
22
|
+
castor-extract-strategy=castor_extractor.commands.extract_strategy:main
|
|
22
23
|
castor-extract-tableau=castor_extractor.commands.extract_tableau:main
|
|
23
24
|
castor-extract-thoughtspot=castor_extractor.commands.extract_thoughtspot:main
|
|
24
25
|
castor-file-check=castor_extractor.commands.file_check:main
|
|
File without changes
|
|
File without changes
|