castor-extractor 0.24.1__py3-none-any.whl → 0.24.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (31) hide show
  1. CHANGELOG.md +16 -0
  2. castor_extractor/commands/extract_tableau.py +2 -2
  3. castor_extractor/visualization/tableau/__init__.py +3 -0
  4. castor_extractor/visualization/{tableau_revamp → tableau}/assets.py +4 -4
  5. castor_extractor/visualization/tableau/client/__init__.py +2 -0
  6. castor_extractor/visualization/{tableau_revamp → tableau}/client/client.py +15 -15
  7. castor_extractor/visualization/{tableau_revamp → tableau}/client/client_metadata_api.py +9 -9
  8. castor_extractor/visualization/{tableau_revamp → tableau}/client/client_rest_api.py +6 -6
  9. castor_extractor/visualization/{tableau_revamp → tableau}/client/client_tsc.py +9 -9
  10. castor_extractor/visualization/{tableau_revamp → tableau}/client/credentials.py +2 -2
  11. castor_extractor/visualization/{tableau_revamp → tableau}/client/gql_queries.py +8 -8
  12. castor_extractor/visualization/{tableau_revamp → tableau}/client/rest_fields.py +10 -10
  13. castor_extractor/visualization/{tableau_revamp → tableau}/extract.py +7 -7
  14. castor_extractor/visualization/thoughtspot/client/client.py +54 -17
  15. castor_extractor/visualization/thoughtspot/client/endpoints.py +2 -2
  16. castor_extractor/visualization/thoughtspot/client/pagination.py +25 -0
  17. castor_extractor/warehouse/snowflake/queries/column.sql +3 -1
  18. castor_extractor/warehouse/snowflake/queries/query.sql +19 -11
  19. castor_extractor/warehouse/snowflake/queries/schema.sql +1 -0
  20. castor_extractor/warehouse/snowflake/queries/table.sql +2 -2
  21. {castor_extractor-0.24.1.dist-info → castor_extractor-0.24.4.dist-info}/METADATA +17 -1
  22. {castor_extractor-0.24.1.dist-info → castor_extractor-0.24.4.dist-info}/RECORD +27 -28
  23. castor_extractor/visualization/tableau_revamp/__init__.py +0 -3
  24. castor_extractor/visualization/tableau_revamp/client/__init__.py +0 -2
  25. castor_extractor/visualization/thoughtspot/client/utils.py +0 -31
  26. castor_extractor/visualization/thoughtspot/client/utils_test.py +0 -75
  27. /castor_extractor/visualization/{tableau_revamp → tableau}/client/errors.py +0 -0
  28. /castor_extractor/visualization/{tableau_revamp → tableau}/constants.py +0 -0
  29. {castor_extractor-0.24.1.dist-info → castor_extractor-0.24.4.dist-info}/LICENCE +0 -0
  30. {castor_extractor-0.24.1.dist-info → castor_extractor-0.24.4.dist-info}/WHEEL +0 -0
  31. {castor_extractor-0.24.1.dist-info → castor_extractor-0.24.4.dist-info}/entry_points.txt +0 -0
CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.4 - 2025-03-19
4
+
5
+ * Snowflake:
6
+ * improve the list of ignored queries in the query history extraction
7
+ * ignore the following query types : CALL, COMMENT, EXPLAIN, REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION, REVOKE, TRUNCATE_TABLE, UNDROP
8
+ * ignore queries with empty text
9
+ * filter out schemas with empty names
10
+
11
+ ## 0.24.3 - 2025-03-18
12
+
13
+ * Replace ThoughtSpot endpoint `/api/rest/2.0/report/liveboard` with `/api/rest/2.0/metadata/liveboard/data` following the deprecation of the CSV option
14
+
15
+ ## 0.24.2 - 2025-03-17
16
+
17
+ * Rename Revamped Tableau Connector classes
18
+
3
19
  ## 0.24.1 - 2025-03-14
4
20
 
5
21
  * Added support for Looker Studio
@@ -2,7 +2,7 @@ import logging
2
2
  from argparse import ArgumentParser
3
3
 
4
4
  from castor_extractor.utils import parse_filled_arguments # type: ignore
5
- from castor_extractor.visualization import tableau_revamp # type: ignore
5
+ from castor_extractor.visualization import tableau # type: ignore
6
6
 
7
7
  logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
8
8
 
@@ -36,4 +36,4 @@ def main():
36
36
 
37
37
  parser.add_argument("-o", "--output", help="Directory to write to")
38
38
 
39
- tableau_revamp.extract_all(**parse_filled_arguments(parser))
39
+ tableau.extract_all(**parse_filled_arguments(parser))
@@ -0,0 +1,3 @@
1
+ from .assets import TableauAsset
2
+ from .client import TableauClient, TableauCredentials
3
+ from .extract import extract_all, iterate_all_data
@@ -1,7 +1,7 @@
1
1
  from ...types import ExternalAsset
2
2
 
3
3
 
4
- class TableauRevampAsset(ExternalAsset):
4
+ class TableauAsset(ExternalAsset):
5
5
  """
6
6
  Tableau assets
7
7
  """
@@ -23,7 +23,7 @@ class TableauRevampAsset(ExternalAsset):
23
23
 
24
24
  # assets that are only available for clients using Tableau Pulse
25
25
  TABLEAU_PULSE_ASSETS = (
26
- TableauRevampAsset.METRIC,
27
- TableauRevampAsset.METRIC_DEFINITION,
28
- TableauRevampAsset.SUBSCRIPTION,
26
+ TableauAsset.METRIC,
27
+ TableauAsset.METRIC_DEFINITION,
28
+ TableauAsset.SUBSCRIPTION,
29
29
  )
@@ -0,0 +1,2 @@
1
+ from .client import TableauClient
2
+ from .credentials import TableauCredentials
@@ -4,31 +4,31 @@ from typing import Optional
4
4
  import tableauserverclient as TSC # type: ignore
5
5
 
6
6
  from ....utils import SerializedAsset
7
- from ..assets import TABLEAU_PULSE_ASSETS, TableauRevampAsset
7
+ from ..assets import TABLEAU_PULSE_ASSETS, TableauAsset
8
8
  from ..constants import CREDENTIALS_SITE_ID_KEY, DEFAULT_TIMEOUT_SECONDS
9
9
  from .client_metadata_api import TableauClientMetadataApi
10
10
  from .client_rest_api import TableauClientRestApi
11
11
  from .client_tsc import TableauClientTSC
12
- from .credentials import TableauRevampCredentials
12
+ from .credentials import TableauCredentials
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
16
16
  # these assets must be extracted via TableauServerClient (TSC)
17
17
  _TSC_ASSETS = (
18
18
  # projects are not available in Metadata API
19
- TableauRevampAsset.PROJECT,
19
+ TableauAsset.PROJECT,
20
20
  # view count are not available in Metadata API
21
- TableauRevampAsset.USAGE,
21
+ TableauAsset.USAGE,
22
22
  # only users who published content can be extracted from MetadataAPI
23
- TableauRevampAsset.USER,
23
+ TableauAsset.USER,
24
24
  )
25
25
 
26
26
  # these assets must be extracted via the REST API
27
27
  _REST_API_ASSETS = (
28
28
  # Tableau Pulse assets are only available in REST API
29
- TableauRevampAsset.METRIC,
30
- TableauRevampAsset.METRIC_DEFINITION,
31
- TableauRevampAsset.SUBSCRIPTION,
29
+ TableauAsset.METRIC,
30
+ TableauAsset.METRIC_DEFINITION,
31
+ TableauAsset.SUBSCRIPTION,
32
32
  )
33
33
 
34
34
  logging.getLogger("tableau.endpoint").setLevel(logging.WARNING)
@@ -109,7 +109,7 @@ def _server(
109
109
  return server
110
110
 
111
111
 
112
- class TableauRevampClient:
112
+ class TableauClient:
113
113
  """
114
114
  Connect to Tableau's API and extract assets.
115
115
 
@@ -119,7 +119,7 @@ class TableauRevampClient:
119
119
 
120
120
  def __init__(
121
121
  self,
122
- credentials: TableauRevampCredentials,
122
+ credentials: TableauCredentials,
123
123
  timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
124
124
  with_pulse: bool = False,
125
125
  override_page_size: Optional[int] = None,
@@ -186,7 +186,7 @@ class TableauRevampClient:
186
186
  )
187
187
 
188
188
  def _fetch_datasources(self) -> SerializedAsset:
189
- asset = TableauRevampAsset.DATASOURCE
189
+ asset = TableauAsset.DATASOURCE
190
190
 
191
191
  datasources = self._client_metadata.fetch(asset)
192
192
  tsc_datasources = self._client_tsc.fetch(asset)
@@ -194,7 +194,7 @@ class TableauRevampClient:
194
194
  return _merge_datasources(datasources, tsc_datasources)
195
195
 
196
196
  def _fetch_workbooks(self) -> SerializedAsset:
197
- asset = TableauRevampAsset.WORKBOOK
197
+ asset = TableauAsset.WORKBOOK
198
198
 
199
199
  site_id = self._credentials.site_id
200
200
  workbooks = self._client_metadata.fetch(asset)
@@ -206,7 +206,7 @@ class TableauRevampClient:
206
206
 
207
207
  def fetch(
208
208
  self,
209
- asset: TableauRevampAsset,
209
+ asset: TableauAsset,
210
210
  ) -> SerializedAsset:
211
211
  """
212
212
  Extract the given Tableau Asset
@@ -217,11 +217,11 @@ class TableauRevampClient:
217
217
 
218
218
  logger.info(f"Extracting {asset.name}...")
219
219
 
220
- if asset == TableauRevampAsset.DATASOURCE:
220
+ if asset == TableauAsset.DATASOURCE:
221
221
  # two APIs are required to extract datasources
222
222
  return self._fetch_datasources()
223
223
 
224
- if asset == TableauRevampAsset.WORKBOOK:
224
+ if asset == TableauAsset.WORKBOOK:
225
225
  # two APIs are required to extract workbooks
226
226
  return self._fetch_workbooks()
227
227
 
@@ -4,21 +4,21 @@ from typing import Optional
4
4
  import tableauserverclient as TSC # type: ignore
5
5
 
6
6
  from ....utils import SerializedAsset, retry
7
- from ..assets import TableauRevampAsset
7
+ from ..assets import TableauAsset
8
8
  from ..constants import DEFAULT_PAGE_SIZE
9
9
  from .errors import TableauApiError, TableauApiTimeout
10
10
  from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
11
11
 
12
12
  # increase the value when extraction is too slow
13
13
  # decrease the value when timeouts arise
14
- _CUSTOM_PAGE_SIZE: dict[TableauRevampAsset, int] = {
14
+ _CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
15
15
  # for some clients, extraction of columns tend to hit the node limit
16
16
  # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
17
17
  # the workaround is to reduce pagination
18
- TableauRevampAsset.COLUMN: 50,
18
+ TableauAsset.COLUMN: 50,
19
19
  # fields are light but volumes are bigger
20
- TableauRevampAsset.FIELD: 1000,
21
- TableauRevampAsset.TABLE: 50,
20
+ TableauAsset.FIELD: 1000,
21
+ TableauAsset.TABLE: 50,
22
22
  }
23
23
 
24
24
  _TIMEOUT_MESSAGE = (
@@ -115,7 +115,7 @@ class TableauClientMetadataApi:
115
115
  result_pages = gql_query_scroll(self._server, query, resource)
116
116
  return [asset for page in result_pages for asset in page]
117
117
 
118
- def _page_size(self, asset: TableauRevampAsset) -> int:
118
+ def _page_size(self, asset: TableauAsset) -> int:
119
119
  return (
120
120
  self._override_page_size
121
121
  or _CUSTOM_PAGE_SIZE.get(asset)
@@ -124,7 +124,7 @@ class TableauClientMetadataApi:
124
124
 
125
125
  def _fetch_fields(self) -> SerializedAsset:
126
126
  result: SerializedAsset = []
127
- page_size = self._page_size(TableauRevampAsset.FIELD)
127
+ page_size = self._page_size(TableauAsset.FIELD)
128
128
  for resource, fields in FIELDS_QUERIES:
129
129
  current = self._call(resource, fields, page_size)
130
130
  result.extend(current)
@@ -132,9 +132,9 @@ class TableauClientMetadataApi:
132
132
 
133
133
  def fetch(
134
134
  self,
135
- asset: TableauRevampAsset,
135
+ asset: TableauAsset,
136
136
  ) -> SerializedAsset:
137
- if asset == TableauRevampAsset.FIELD:
137
+ if asset == TableauAsset.FIELD:
138
138
  return self._fetch_fields()
139
139
 
140
140
  page_size = self._page_size(asset)
@@ -5,7 +5,7 @@ import requests
5
5
  import tableauserverclient as TSC # type: ignore
6
6
 
7
7
  from ....utils import SerializedAsset, deduplicate
8
- from ..assets import TableauRevampAsset
8
+ from ..assets import TableauAsset
9
9
  from .rest_fields import REST_FIELDS
10
10
 
11
11
  logger = logging.getLogger(__name__)
@@ -17,7 +17,7 @@ _METRICS_DEFINITION_URL = "{base}/pulse/site/{site}/{definition_id}"
17
17
 
18
18
  def _pick(
19
19
  data: SerializedAsset,
20
- asset: TableauRevampAsset,
20
+ asset: TableauAsset,
21
21
  ) -> SerializedAsset:
22
22
  keys = REST_FIELDS[asset]
23
23
  return [{key: row[key] for key in keys} for row in data]
@@ -107,18 +107,18 @@ class TableauClientRestApi:
107
107
 
108
108
  def fetch(
109
109
  self,
110
- asset: TableauRevampAsset,
110
+ asset: TableauAsset,
111
111
  ) -> SerializedAsset:
112
- if asset == TableauRevampAsset.SUBSCRIPTION:
112
+ if asset == TableauAsset.SUBSCRIPTION:
113
113
  # https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_pulse.htm#PulseSubscriptionService_ListSubscriptions
114
114
  data = self._call(path=_PULSE_API, target="subscriptions")
115
115
 
116
- elif asset == TableauRevampAsset.METRIC_DEFINITION:
116
+ elif asset == TableauAsset.METRIC_DEFINITION:
117
117
  # https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_pulse.htm#MetricQueryService_ListDefinitions
118
118
  data = self._call(path=_PULSE_API, target="definitions")
119
119
  self._compute_metric_url(data)
120
120
 
121
- elif asset == TableauRevampAsset.METRIC:
121
+ elif asset == TableauAsset.METRIC:
122
122
  # https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_pulse.htm#MetricQueryService_ListMetrics
123
123
  definitions = self._call(path=_PULSE_API, target="definitions")
124
124
  data = self._fetch_metrics(definitions)
@@ -4,7 +4,7 @@ from typing import Any
4
4
  import tableauserverclient as TSC # type: ignore
5
5
 
6
6
  from ....utils import JsonType, SerializedAsset
7
- from ..assets import TableauRevampAsset
7
+ from ..assets import TableauAsset
8
8
  from .rest_fields import REST_FIELDS
9
9
 
10
10
 
@@ -30,13 +30,13 @@ class TableauClientTSC:
30
30
  def _pick_fields(
31
31
  self,
32
32
  data: Iterable,
33
- asset: TableauRevampAsset,
33
+ asset: TableauAsset,
34
34
  ) -> Iterator[dict]:
35
35
  keys = REST_FIELDS[asset]
36
36
 
37
37
  for row in data:
38
38
  fields = {key: _pick(row, key) for key in keys}
39
- if asset == TableauRevampAsset.USER:
39
+ if asset == TableauAsset.USER:
40
40
  self._server.users.populate_groups(row)
41
41
  fields["group_ids"] = [group.id for group in row.groups]
42
42
 
@@ -44,21 +44,21 @@ class TableauClientTSC:
44
44
 
45
45
  def fetch(
46
46
  self,
47
- asset: TableauRevampAsset,
47
+ asset: TableauAsset,
48
48
  ) -> SerializedAsset:
49
- if asset == TableauRevampAsset.DATASOURCE:
49
+ if asset == TableauAsset.DATASOURCE:
50
50
  data = TSC.Pager(self._server.datasources)
51
51
 
52
- elif asset == TableauRevampAsset.PROJECT:
52
+ elif asset == TableauAsset.PROJECT:
53
53
  data = TSC.Pager(self._server.projects)
54
54
 
55
- elif asset == TableauRevampAsset.USAGE:
55
+ elif asset == TableauAsset.USAGE:
56
56
  data = TSC.Pager(self._server.views, usage=True)
57
57
 
58
- elif asset == TableauRevampAsset.USER:
58
+ elif asset == TableauAsset.USER:
59
59
  data = TSC.Pager(self._server.users)
60
60
 
61
- elif asset == TableauRevampAsset.WORKBOOK:
61
+ elif asset == TableauAsset.WORKBOOK:
62
62
  data = TSC.Pager(self._server.workbooks)
63
63
 
64
64
  else:
@@ -15,7 +15,7 @@ _DEFAULT_SITE_ID_USER_INPUT = "default"
15
15
  TABLEAU_ENV_PREFIX = "CASTOR_TABLEAU_"
16
16
 
17
17
 
18
- class TableauRevampCredentials(BaseSettings):
18
+ class TableauCredentials(BaseSettings):
19
19
  """
20
20
  Tableau's credentials to connect to both APIs (REST and GRAPHQL)
21
21
  """
@@ -42,7 +42,7 @@ class TableauRevampCredentials(BaseSettings):
42
42
  return site_id
43
43
 
44
44
  @model_validator(mode="after")
45
- def _check_user_xor_pat_login(self) -> "TableauRevampCredentials":
45
+ def _check_user_xor_pat_login(self) -> "TableauCredentials":
46
46
  """
47
47
  Checks that credentials are correctly input, it means either:
48
48
  - User and password are filled
@@ -1,4 +1,4 @@
1
- from ..assets import TableauRevampAsset
1
+ from ..assets import TableauAsset
2
2
 
3
3
  QUERY_TEMPLATE = """
4
4
  {{
@@ -128,13 +128,13 @@ workbook { id }
128
128
  """
129
129
 
130
130
 
131
- GQL_QUERIES: dict[TableauRevampAsset, tuple[str, str]] = {
132
- TableauRevampAsset.COLUMN: ("columns", _COLUMNS_QUERY),
133
- TableauRevampAsset.DASHBOARD: ("dashboards", _DASHBOARDS_QUERY),
134
- TableauRevampAsset.DATASOURCE: ("datasources", _DATASOURCES_QUERY),
135
- TableauRevampAsset.SHEET: ("sheets", _SHEETS_QUERY),
136
- TableauRevampAsset.TABLE: ("tables", _TABLES_QUERY),
137
- TableauRevampAsset.WORKBOOK: ("workbooks", _WORKBOOKS_QUERY),
131
+ GQL_QUERIES: dict[TableauAsset, tuple[str, str]] = {
132
+ TableauAsset.COLUMN: ("columns", _COLUMNS_QUERY),
133
+ TableauAsset.DASHBOARD: ("dashboards", _DASHBOARDS_QUERY),
134
+ TableauAsset.DATASOURCE: ("datasources", _DATASOURCES_QUERY),
135
+ TableauAsset.SHEET: ("sheets", _SHEETS_QUERY),
136
+ TableauAsset.TABLE: ("tables", _TABLES_QUERY),
137
+ TableauAsset.WORKBOOK: ("workbooks", _WORKBOOKS_QUERY),
138
138
  }
139
139
 
140
140
  FIELDS_QUERIES = (
@@ -1,44 +1,44 @@
1
- from ..assets import TableauRevampAsset
1
+ from ..assets import TableauAsset
2
2
 
3
3
  # list of fields to pick in REST API or TSC responses
4
- REST_FIELDS: dict[TableauRevampAsset, set[str]] = {
5
- TableauRevampAsset.DATASOURCE: {
4
+ REST_FIELDS: dict[TableauAsset, set[str]] = {
5
+ TableauAsset.DATASOURCE: {
6
6
  "id",
7
7
  "project_id",
8
8
  "webpage_url",
9
9
  },
10
- TableauRevampAsset.METRIC: {
10
+ TableauAsset.METRIC: {
11
11
  "id",
12
12
  "definition_id",
13
13
  },
14
- TableauRevampAsset.METRIC_DEFINITION: {
14
+ TableauAsset.METRIC_DEFINITION: {
15
15
  "metadata",
16
16
  "specification",
17
17
  },
18
- TableauRevampAsset.PROJECT: {
18
+ TableauAsset.PROJECT: {
19
19
  "description",
20
20
  "id",
21
21
  "name",
22
22
  "parent_id",
23
23
  },
24
- TableauRevampAsset.SUBSCRIPTION: {
24
+ TableauAsset.SUBSCRIPTION: {
25
25
  "follower",
26
26
  "id",
27
27
  "metric_id",
28
28
  },
29
- TableauRevampAsset.USAGE: {
29
+ TableauAsset.USAGE: {
30
30
  "name",
31
31
  "total_views",
32
32
  "workbook_id",
33
33
  },
34
- TableauRevampAsset.USER: {
34
+ TableauAsset.USER: {
35
35
  "email",
36
36
  "fullname",
37
37
  "id",
38
38
  "name",
39
39
  "site_role",
40
40
  },
41
- TableauRevampAsset.WORKBOOK: {
41
+ TableauAsset.WORKBOOK: {
42
42
  "id",
43
43
  "project_id",
44
44
  },
@@ -10,18 +10,18 @@ from ...utils import (
10
10
  write_json,
11
11
  write_summary,
12
12
  )
13
- from .assets import TableauRevampAsset
14
- from .client import TableauRevampClient, TableauRevampCredentials
13
+ from .assets import TableauAsset
14
+ from .client import TableauClient, TableauCredentials
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
18
 
19
19
  def iterate_all_data(
20
- client: TableauRevampClient,
21
- ) -> Iterable[tuple[TableauRevampAsset, list]]:
20
+ client: TableauClient,
21
+ ) -> Iterable[tuple[TableauAsset, list]]:
22
22
  """Iterate over the extracted Data from Tableau"""
23
23
 
24
- for asset in TableauRevampAsset:
24
+ for asset in TableauAsset:
25
25
  data = client.fetch(asset)
26
26
  yield asset, deep_serialize(data)
27
27
 
@@ -36,8 +36,8 @@ def extract_all(**kwargs) -> None:
36
36
  page_size = kwargs.get("page_size")
37
37
  timestamp = current_timestamp()
38
38
 
39
- credentials = TableauRevampCredentials(**kwargs)
40
- client = TableauRevampClient(
39
+ credentials = TableauCredentials(**kwargs)
40
+ client = TableauClient(
41
41
  credentials,
42
42
  with_pulse=with_pulse,
43
43
  override_page_size=page_size,
@@ -1,13 +1,17 @@
1
+ import logging
1
2
  from collections.abc import Iterator
2
- from typing import Optional
3
+ from functools import partial
4
+ from typing import Iterable, Optional
3
5
 
4
6
  import requests
7
+ from requests import Response
5
8
 
6
9
  from ....utils import (
7
10
  APIClient,
8
11
  BearerAuth,
9
12
  RequestSafeMode,
10
13
  build_url,
14
+ fetch_all_pages,
11
15
  handle_response,
12
16
  )
13
17
  from ..assets import (
@@ -19,9 +23,7 @@ from .credentials import (
19
23
  from .endpoints import (
20
24
  ThoughtspotEndpointFactory,
21
25
  )
22
- from .utils import (
23
- usage_liveboard_reader,
24
- )
26
+ from .pagination import METADATA_BATCH_SIZE, ThoughtSpotPagination
25
27
 
26
28
  _AUTH_TIMEOUT_S = 60
27
29
  _THOUGHTSPOT_HEADERS = {
@@ -29,7 +31,6 @@ _THOUGHTSPOT_HEADERS = {
29
31
  "Accept": "application/json",
30
32
  "Content-Type": "application/json",
31
33
  }
32
- _METADATA_BATCH_SIZE = 100
33
34
  # https://docs.thoughtspot.com/cloud/latest/object-usage-liveboard
34
35
  _OBJECT_USAGE_LIVEBOARD = "Object Usage"
35
36
  _ANSWER_USAGE_VIZ = "Answer Usage, by User"
@@ -40,6 +41,9 @@ _LIVEBOARD_USAGE_VIZ = "Popular Liveboards Last 30 Days"
40
41
  THOUGHTSPOT_SAFE_MODE = RequestSafeMode()
41
42
 
42
43
 
44
+ logger = logging.getLogger(__name__)
45
+
46
+
43
47
  class ThoughtspotBearerAuth(BearerAuth):
44
48
  def __init__(self, host: str, token_payload: dict[str, str]):
45
49
  auth_endpoint = ThoughtspotEndpointFactory.authentication()
@@ -86,7 +90,7 @@ class ThoughtspotClient(APIClient):
86
90
  search_filters = {
87
91
  "metadata": [{"type": metadata_type}],
88
92
  "include_details": True,
89
- "record_size": _METADATA_BATCH_SIZE,
93
+ "record_size": METADATA_BATCH_SIZE,
90
94
  "record_offset": offset,
91
95
  }
92
96
  if identifier:
@@ -100,9 +104,9 @@ class ThoughtspotClient(APIClient):
100
104
  data=search_filters,
101
105
  )
102
106
  yield from metadata
103
- if len(metadata) < _METADATA_BATCH_SIZE:
107
+ if len(metadata) < METADATA_BATCH_SIZE:
104
108
  break
105
- offset = offset + _METADATA_BATCH_SIZE
109
+ offset = offset + METADATA_BATCH_SIZE
106
110
 
107
111
  def _get_all_answers(self) -> Iterator[dict]:
108
112
  yield from self._metadata_search(metadata_type="ANSWER")
@@ -120,7 +124,7 @@ class ThoughtspotClient(APIClient):
120
124
  self,
121
125
  liveboard_name: str,
122
126
  visualization_name: str,
123
- ) -> Iterator[dict]:
127
+ ) -> Iterator[list[list]]:
124
128
  """
125
129
  Yields the data of a given visualization in the given liveboard.
126
130
  ThoughtSpot maintains two system liveboards with stats about data usage,
@@ -133,29 +137,62 @@ class ThoughtspotClient(APIClient):
133
137
  )
134
138
  liveboard_id = usage_liveboard["metadata_id"]
135
139
 
136
- data = self._post(
137
- endpoint=ThoughtspotEndpointFactory.liveboard(),
138
- headers={"Accept": "application/octet-stream"},
140
+ def handler(response: Response) -> dict:
141
+ response_dict = response.json()
142
+ contents = response_dict.get("contents", [])
143
+ if not contents:
144
+ logger.warning("No data found in response")
145
+ return dict()
146
+ return contents[0]
147
+
148
+ request = partial(
149
+ self._post,
150
+ endpoint=ThoughtspotEndpointFactory.liveboard_data(),
139
151
  data={
140
152
  "metadata_identifier": liveboard_id,
141
- "file_format": "CSV",
142
153
  "visualization_identifiers": [visualization_name],
154
+ "record_offset": 0,
155
+ "record_size": METADATA_BATCH_SIZE,
143
156
  },
144
- handler=lambda x: x.text,
157
+ handler=handler,
145
158
  )
146
- yield from usage_liveboard_reader(data)
159
+ yield from fetch_all_pages(request, ThoughtSpotPagination)
147
160
 
148
161
  def _get_answer_usages(self) -> Iterator[dict]:
149
- return self._get_usages(
162
+ """
163
+ Returns the usage data of saved Answers, which is found in a visualization
164
+ of the "Object Usage" liveboard.
165
+ Each data row returned by the API is transformed from a list into a dictionary.
166
+ The columns are explicitly listed here because in the API response,
167
+ there is a mismatch between the number of column names and the number
168
+ of values per data row.
169
+ """
170
+ data: Iterable[list[list]] = self._get_usages(
150
171
  liveboard_name=_OBJECT_USAGE_LIVEBOARD,
151
172
  visualization_name=_ANSWER_USAGE_VIZ,
152
173
  )
174
+ columns = (
175
+ "Answer name",
176
+ "Number of unique users",
177
+ "Count of object interactions",
178
+ )
179
+ for row in data:
180
+ yield dict(zip(columns, row))
153
181
 
154
182
  def _get_liveboards_usages(self) -> Iterator[dict]:
155
- return self._get_usages(
183
+ """
184
+ Returns the usage data of Liveboards, which is found in a visualization
185
+ of the "User Adoption" liveboard.
186
+ Each data row returned by the API is transformed from a list into a dictionary.
187
+ See `_get_answer_usages` regarding the columns list.
188
+ """
189
+ data: Iterable[list[list]] = self._get_usages(
156
190
  liveboard_name=_USER_ADOPTION_LIVEBOARD,
157
191
  visualization_name=_LIVEBOARD_USAGE_VIZ,
158
192
  )
193
+ columns = ("Pinboard", "Unique Number of User", "Pinboard Views")
194
+ for row in data:
195
+ yield dict(zip(columns, row))
159
196
 
160
197
  def fetch(self, asset: ThoughtspotAsset) -> Iterator[dict]:
161
198
  if asset == ThoughtspotAsset.ANSWERS:
@@ -8,5 +8,5 @@ class ThoughtspotEndpointFactory:
8
8
  return "api/rest/2.0/metadata/search"
9
9
 
10
10
  @classmethod
11
- def liveboard(cls) -> str:
12
- return "api/rest/2.0/report/liveboard"
11
+ def liveboard_data(cls) -> str:
12
+ return "api/rest/2.0/metadata/liveboard/data"
@@ -0,0 +1,25 @@
1
+ from pydantic import ConfigDict, Field
2
+
3
+ from ....utils import PaginationModel
4
+
5
+ METADATA_BATCH_SIZE = 100
6
+
7
+
8
+ class ThoughtSpotPagination(PaginationModel):
9
+ data_rows: list = Field(default_factory=list)
10
+ record_offset: int
11
+ record_size: int
12
+
13
+ model_config = ConfigDict(
14
+ populate_by_name=True,
15
+ from_attributes=True,
16
+ )
17
+
18
+ def is_last(self) -> bool:
19
+ return len(self.data_rows) < METADATA_BATCH_SIZE
20
+
21
+ def next_page_payload(self) -> dict:
22
+ return {"record_offset": self.record_offset + METADATA_BATCH_SIZE}
23
+
24
+ def page_results(self) -> list:
25
+ return self.data_rows
@@ -47,7 +47,9 @@ FROM snowflake.account_usage.columns AS c
47
47
  JOIN snowflake.account_usage.tables AS t ON t.table_id = c.table_id
48
48
  JOIN tags_agg_columns ta ON c.column_id = ta.column_id
49
49
  WHERE TRUE
50
- AND COALESCE(c.column_name, '') != ''
50
+ AND TRIM(COALESCE(c.column_name, '')) != ''
51
+ AND TRIM(COALESCE(t.table_name, '')) != ''
52
+ AND TRIM(COALESCE(s.schema_name, '')) != ''
51
53
  AND UPPER(c.table_catalog) NOT IN ('SNOWFLAKE', 'UTIL_DB')
52
54
  AND (
53
55
  c.deleted IS NULL
@@ -51,20 +51,28 @@ WHERE TRUE
51
51
  AND HOUR(CONVERT_TIMEZONE('UTC', start_time)) BETWEEN :hour_min AND :hour_max
52
52
  AND execution_status = 'SUCCESS'
53
53
  AND query_text != 'SELECT 1'
54
+ AND TRIM(COALESCE(query_text, '')) != ''
54
55
  AND query_type NOT IN (
55
- 'SHOW',
56
- 'USE',
57
- 'ROLLBACK',
58
- 'DESCRIBE',
59
56
  'ALTER_SESSION',
60
- 'PUT_FILES',
57
+ 'BEGIN_TRANSACTION',
58
+ 'CALL',
59
+ 'COMMENT',
60
+ 'COMMIT',
61
61
  'CREATE', -- create objects: stage|function|schema|procedure|file|storage|pipe|notification integration
62
- 'SET',
62
+ 'DESCRIBE',
63
+ 'DROP',
64
+ 'EXPLAIN',
65
+ 'GET_FILES',
63
66
  'GRANT',
64
- 'COMMIT',
67
+ 'PUT_FILES',
68
+ 'REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION',
69
+ 'REMOVE_FILES',
70
+ 'REVOKE',
71
+ 'ROLLBACK',
72
+ 'SET',
73
+ 'SHOW',
74
+ 'TRUNCATE_TABLE',
75
+ 'UNDROP',
65
76
  'UNLOAD',
66
- 'GET_FILES',
67
- 'DROP',
68
- 'BEGIN_TRANSACTION',
69
- 'REMOVE_FILES'
77
+ 'USE'
70
78
  )
@@ -16,6 +16,7 @@ WHERE TRUE
16
16
  deleted IS NULL
17
17
  OR deleted > CURRENT_TIMESTAMP - INTERVAL '1 day'
18
18
  )
19
+ AND TRIM(COALESCE(schema_name, '')) != ''
19
20
  {database_allowed}
20
21
  {database_blocked}
21
22
  AND CASE {has_fetch_transient} WHEN FALSE THEN NOT s.is_transient::BOOLEAN ELSE TRUE END
@@ -41,8 +41,8 @@ FROM snowflake.account_usage.tables AS t
41
41
  JOIN snowflake.account_usage.schemata AS s ON s.schema_id = t.table_schema_id
42
42
  JOIN tags_agg_tables ta ON t.table_id = ta.table_id
43
43
  WHERE TRUE
44
- AND t.table_name IS NOT NULL
45
- AND t.table_name != ''
44
+ AND TRIM(COALESCE(t.table_name, '')) != ''
45
+ AND TRIM(COALESCE(s.schema_name, '')) != ''
46
46
  AND UPPER(t.table_catalog) NOT IN ('SNOWFLAKE', 'UTIL_DB')
47
47
  AND (
48
48
  t.deleted IS NULL
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.1
3
+ Version: 0.24.4
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -210,6 +210,22 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
210
210
 
211
211
  # Changelog
212
212
 
213
+ ## 0.24.4 - 2025-03-19
214
+
215
+ * Snowflake:
216
+ * improve the list of ignored queries in the query history extraction
217
+ * ignore the following query types : CALL, COMMENT, EXPLAIN, REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION, REVOKE, TRUNCATE_TABLE, UNDROP
218
+ * ignore queries with empty text
219
+ * filter out schemas with empty names
220
+
221
+ ## 0.24.3 - 2025-03-18
222
+
223
+ * Replace ThoughtSpot endpoint `/api/rest/2.0/report/liveboard` with `/api/rest/2.0/metadata/liveboard/data` following the deprecation of the CSV option
224
+
225
+ ## 0.24.2 - 2025-03-17
226
+
227
+ * Rename Revamped Tableau Connector classes
228
+
213
229
  ## 0.24.1 - 2025-03-14
214
230
 
215
231
  * Added support for Looker Studio
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=uuXk7pDCrTLgVqMxD2SYWDXs3OzvaBfe592stOCjBdg,15815
1
+ CHANGELOG.md,sha256=1Y5FmmQDspwZaOhKjnJosP2sNd898LeTOmVIMTBt9Bw,16387
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -25,7 +25,7 @@ castor_extractor/commands/extract_salesforce_reporting.py,sha256=FdANTNiLkIPdm80
25
25
  castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_9bYjUMg7BOW-k,643
26
26
  castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
27
27
  castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
28
- castor_extractor/commands/extract_tableau.py,sha256=DGQaXS-61rV-uzBtqfvqtyQzjLtrLTzE1ViTXPG1eck,1379
28
+ castor_extractor/commands/extract_tableau.py,sha256=xXlLKLN8Eu_a8Kt2F4E-C5D-gq8SUmvoxJcdR_thKKY,1365
29
29
  castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
30
30
  castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
31
31
  castor_extractor/commands/upload.py,sha256=rLXp7gQ8zb1kLbho4FT87q8eJd8Gvo_TkyIynAaQ-4s,1342
@@ -260,27 +260,26 @@ castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ7
260
260
  castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
261
261
  castor_extractor/visualization/sigma/client/pagination.py,sha256=kNEhNq08tTGbypyMjxs0w4uvDtQc_iaWpOZweaa_FsU,690
262
262
  castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZexu48EaY9tUwi14gzM,2298
263
- castor_extractor/visualization/tableau_revamp/__init__.py,sha256=a3DGjQhaz17gBqW-E84TAgupKbqLC40y5Ajo1yn-ot4,156
264
- castor_extractor/visualization/tableau_revamp/assets.py,sha256=8sJsK6Qixao6xVmVaO1usvs16SjNub9sIx7o-adYV14,659
265
- castor_extractor/visualization/tableau_revamp/client/__init__.py,sha256=wmS9uLtUiqNYVloi0-DgD8d2qzu3RVZEAtWiaDp6G_M,90
266
- castor_extractor/visualization/tableau_revamp/client/client.py,sha256=Ju89lMDiLOZ2LjxylcFm5429WElxGxjc52bMIWoKCDA,7716
267
- castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py,sha256=WdALsMGTji2C5oSDyRwFzq-f5HZDwX-m3W8Byx87Qh4,4357
268
- castor_extractor/visualization/tableau_revamp/client/client_rest_api.py,sha256=O2F4qfrElTHHuD5WRPfLufazSmZ65jmlzye1t5rVOaQ,4024
269
- castor_extractor/visualization/tableau_revamp/client/client_tsc.py,sha256=AzN8ytKmq6HUeApTJ118JQ7EBEPESqrg7u8n3GZXqZI,1874
270
- castor_extractor/visualization/tableau_revamp/client/credentials.py,sha256=qA-EaX-4rbQRsn8v4zWh5Kh784ndHLjJaoZwnkQgCyo,1905
271
- castor_extractor/visualization/tableau_revamp/client/errors.py,sha256=ecT8Tit5VtzrOBB9ykblA0nvd75j5-_QDFupjV48zJQ,300
272
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=uKNGRhYeoiKfJ8vxO50L0a2fHDpYQgEdG_eZfYSdHqM,2238
273
- castor_extractor/visualization/tableau_revamp/client/rest_fields.py,sha256=3kvaq48BCBLfm7GL-i5W53MpbmSSi-e0yt31dNOk8ac,948
274
- castor_extractor/visualization/tableau_revamp/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
275
- castor_extractor/visualization/tableau_revamp/extract.py,sha256=Ud_lt1YDSyCBjR38sjtvINy_Ez_TnA_jtM2D-8LcxPA,1471
263
+ castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
264
+ castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
265
+ castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
266
+ castor_extractor/visualization/tableau/client/client.py,sha256=zzqhzIqKyJygo4ZNGk6cZh0e6Z9R1W5T0P9un52KC1M,7626
267
+ castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=fIBsSbRTypBABsCoigO2dkKsw4Eu3GrsEPTDfjY8A80,4303
268
+ castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
269
+ castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
270
+ castor_extractor/visualization/tableau/client/credentials.py,sha256=uQICIgeXmLZfOroTgZt7PuKNKTyqQllRGSTcOmIfrKU,1893
271
+ castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9ykblA0nvd75j5-_QDFupjV48zJQ,300
272
+ castor_extractor/visualization/tableau/client/gql_queries.py,sha256=NISarYh33Ij7DhYxqjTdv681AHYpbft8kPwVUQbAZ7U,2190
273
+ castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
274
+ castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
275
+ castor_extractor/visualization/tableau/extract.py,sha256=FnjmmUdNA9MEf3S5Tw37x6ZXxVsK8R3YnVk1UVYbaZk,1423
276
276
  castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
277
277
  castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
278
278
  castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
279
- castor_extractor/visualization/thoughtspot/client/client.py,sha256=mtwMCPI1-1tyZb1gSYYr-O2QZMTFQwNgillU6ycsOU4,5552
279
+ castor_extractor/visualization/thoughtspot/client/client.py,sha256=lRNkigPV2MTozgBzFkij7mCXMMRqXzPtNs8EEi_f3tk,7127
280
280
  castor_extractor/visualization/thoughtspot/client/credentials.py,sha256=fp4YHiZy-dstWiLr5c4kFU9SyPK5rd2nCeh8k5sVRpM,462
281
- castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=u3FRkmG6j5OIMEeXWZcgRObP8JeC4EutIJEeitNV44c,330
282
- castor_extractor/visualization/thoughtspot/client/utils.py,sha256=3LgbIWoG1e39VW8rYaV4ot_0EFipziwf3rFAZKxrlEY,1072
283
- castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=2XysRU7a58KA2JgNwU2j4GPrN0rkN7Gvk8kQCJlYXVk,2469
281
+ castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=XLDGs7v2e2S2VdJX8cQjMh80KNCHb_H5A9I8ejP1ZPs,342
282
+ castor_extractor/visualization/thoughtspot/client/pagination.py,sha256=iosYUJ7ZMT1G_Jm6AXPwczYnXFzS6Yez-B9-tRFiV_w,619
284
283
  castor_extractor/visualization/thoughtspot/extract.py,sha256=mcXS0jGFpa50td98AVbbTqxchyI5wDCpB-v1o5iRc3g,1354
285
284
  castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
286
285
  castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
@@ -380,16 +379,16 @@ castor_extractor/warehouse/snowflake/credentials.py,sha256=u0sZ6xPtcZmmvnUsAejJk
380
379
  castor_extractor/warehouse/snowflake/credentials_test.py,sha256=Lkc-DHXOvr50KrqAW4nt_x0IA0Mu_CsBVu6ATnzQB6I,673
381
380
  castor_extractor/warehouse/snowflake/extract.py,sha256=3yc9kcVtt2c1uWJOJJgeZchV4VmRr9EeYM3W6gl8zQQ,3201
382
381
  castor_extractor/warehouse/snowflake/queries/.sqlfluff,sha256=vttrwcr64JVIuvc7WIg9C54cbOkjg_VjXNR7YnTGOPE,31
383
- castor_extractor/warehouse/snowflake/queries/column.sql,sha256=Wy-arvS_3Dh0HFrzdpRmBsI58mMlN_5U097s5kMNluQ,1781
382
+ castor_extractor/warehouse/snowflake/queries/column.sql,sha256=Ru-yC0s76I9LehOA4aCZ--xz6D9H1Hyr3OZdILOBHAw,1882
384
383
  castor_extractor/warehouse/snowflake/queries/column_lineage.sql,sha256=YKBiZ6zySSNcXLDXwm31EjGIIkkkZc0-S6hI1SRM80o,1179
385
384
  castor_extractor/warehouse/snowflake/queries/database.sql,sha256=ifZXoKUXtsrGOxml6AcNhA4yybIyatH5va7bcp-lgCU,483
386
385
  castor_extractor/warehouse/snowflake/queries/function.sql,sha256=8LRh0ybhd-RldJ8UZspWUm3yv52evq11O2uqIO4KqeQ,372
387
386
  castor_extractor/warehouse/snowflake/queries/grant_to_role.sql,sha256=O7AJ1LzoXGDFmiVvQ8EMJ5x8FSAnaxRPdmRyAlEmkUM,272
388
387
  castor_extractor/warehouse/snowflake/queries/grant_to_user.sql,sha256=7AalVajU5vRRpIiys1igSwmDXirbwpMTvJr2ihSz2NE,143
389
- castor_extractor/warehouse/snowflake/queries/query.sql,sha256=-OYcWUvdPBkpOfezkZaW7hrOdDz3JyoqjNdRm_88Rsk,1779
388
+ castor_extractor/warehouse/snowflake/queries/query.sql,sha256=w4T6-TgwUozDgaF3Fk-qex7bDdEIHLkkB5XEe2VJXZQ,1992
390
389
  castor_extractor/warehouse/snowflake/queries/role.sql,sha256=D0VvGxLZMwug2SvefhAsNR9YIun0fZvcDWkz891xSYM,96
391
- castor_extractor/warehouse/snowflake/queries/schema.sql,sha256=HCDEw0Nj_GPHBNH3Ik_5BF4rkD5yBfSyeN9UaiFGrI4,730
392
- castor_extractor/warehouse/snowflake/queries/table.sql,sha256=qTwkAJ7-kM8vX03RP16U_5_euWW5ZTQAKuiLPsbj2hs,1438
390
+ castor_extractor/warehouse/snowflake/queries/schema.sql,sha256=iLn6_y5rn63KigjE4GEAMp8ZuZZofhMXYGb8saPDGUc,776
391
+ castor_extractor/warehouse/snowflake/queries/table.sql,sha256=CbSLfJAylyyyD3mkGPSLLE7BHrGjlY499kzO9RN0e4Y,1473
393
392
  castor_extractor/warehouse/snowflake/queries/user.sql,sha256=88V8eRj1NDaD_ufclsKOHHlqCtBMQHOV54yy6RKJaXk,570
394
393
  castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3N7BKkbXVM3RoIzqSDtohqRnE5kg4,673
395
394
  castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
@@ -404,8 +403,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
404
403
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
405
404
  castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
406
405
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
407
- castor_extractor-0.24.1.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
408
- castor_extractor-0.24.1.dist-info/METADATA,sha256=O-v3GkuQVqXbbr5DYt0Tm2uodPoXJd4f6ayR93ywA3M,22971
409
- castor_extractor-0.24.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
410
- castor_extractor-0.24.1.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
411
- castor_extractor-0.24.1.dist-info/RECORD,,
406
+ castor_extractor-0.24.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
407
+ castor_extractor-0.24.4.dist-info/METADATA,sha256=eY2TPP3IDq9an2JJzoZcN-_rG5DJIGzbJOqEtGBhzd4,23543
408
+ castor_extractor-0.24.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
409
+ castor_extractor-0.24.4.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
410
+ castor_extractor-0.24.4.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- from .assets import TableauRevampAsset
2
- from .client import TableauRevampClient, TableauRevampCredentials
3
- from .extract import extract_all, iterate_all_data
@@ -1,2 +0,0 @@
1
- from .client import TableauRevampClient
2
- from .credentials import TableauRevampCredentials
@@ -1,31 +0,0 @@
1
- import csv
2
- import re
3
- from collections.abc import Iterator
4
- from io import StringIO
5
-
6
- _END_OF_GENERATED_TEXT = r'^""$'
7
-
8
-
9
- def usage_liveboard_reader(usage_liveboard_csv: str) -> Iterator[dict]:
10
- """
11
- Converts a CSV string into an iterator of dictionaries after
12
- ignoring the generated text that preceeds the actual CSV header row.
13
- The generated block ends with a row containing only two double quotes.
14
- Here is an example:
15
-
16
- "Data extract produced by Castor on 09/19/2024 06:54"
17
- "Filters applied on data :"
18
- "User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
19
- "Pinboard NOT IN [mlm - availability pinboard,null]"
20
- "Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
21
- "Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
22
- ""
23
-
24
- """
25
- csv_file = StringIO(usage_liveboard_csv)
26
-
27
- line = next(csv_file)
28
- while not re.match(_END_OF_GENERATED_TEXT, line.strip()):
29
- line = next(csv_file)
30
-
31
- yield from csv.DictReader(csv_file)
@@ -1,75 +0,0 @@
1
- from .utils import (
2
- usage_liveboard_reader,
3
- )
4
-
5
- VALID_CSV_1 = '''"Data extract produced by Castor on 09/19/2024 06:54"
6
- "Filters applied on data :"
7
- "User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
8
- "Pinboard NOT IN [mlm - availability pinboard,null]"
9
- "Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
10
- "Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
11
- ""
12
- "Pinboard","Pinboard Views","Unique Number of User"
13
- "Market Report","559","19"
14
- "Retailer report","204","14"
15
- "Second-hand market","72","6"
16
- "September test","25","2"'''
17
-
18
-
19
- VALID_CSV_2 = '''"Data extract produced by Castor on 01/07/2025 16:07"
20
- "Filters applied on data :"
21
- "Timestamp >= 20241208 00:00:00 < 20250107 00:00:00"
22
- ""
23
- "Answer name","User name","Number of unique users","Count of object interactions"
24
- "toto","tata","1","666"'''
25
-
26
- # Invalid CSV input (missing data rows)
27
- INVALID_CSV = '''"Data extract produced by Castor on 09/19/2024 06:54"
28
- "Filters applied on data :"
29
- "User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
30
- "Pinboard NOT IN [mlm - availability pinboard,null]"
31
- "Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
32
- "Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
33
- ""'''
34
-
35
-
36
- def test_usage_liveboard_reader():
37
- expected_output_1 = [
38
- {
39
- "Pinboard": "Market Report",
40
- "Pinboard Views": "559",
41
- "Unique Number of User": "19",
42
- },
43
- {
44
- "Pinboard": "Retailer report",
45
- "Pinboard Views": "204",
46
- "Unique Number of User": "14",
47
- },
48
- {
49
- "Pinboard": "Second-hand market",
50
- "Pinboard Views": "72",
51
- "Unique Number of User": "6",
52
- },
53
- {
54
- "Pinboard": "September test",
55
- "Pinboard Views": "25",
56
- "Unique Number of User": "2",
57
- },
58
- ]
59
- expected_output_2 = [
60
- {
61
- "Answer name": "toto",
62
- "User name": "tata",
63
- "Number of unique users": "1",
64
- "Count of object interactions": "666",
65
- }
66
- ]
67
-
68
- result = list(usage_liveboard_reader(VALID_CSV_1))
69
- assert result == expected_output_1
70
-
71
- result = list(usage_liveboard_reader(VALID_CSV_2))
72
- assert result == expected_output_2
73
-
74
- result = list(usage_liveboard_reader(INVALID_CSV))
75
- assert result == [] # Expect an empty result since there is no data