castor-extractor 0.24.2__py3-none-any.whl → 0.24.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +24 -0
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/batch.py +16 -0
- castor_extractor/utils/batch_test.py +27 -0
- castor_extractor/visualization/domo/client/client.py +10 -4
- castor_extractor/visualization/tableau/client/client_metadata_api.py +23 -18
- castor_extractor/visualization/tableau/client/gql_queries.py +1 -1
- castor_extractor/visualization/thoughtspot/client/client.py +54 -17
- castor_extractor/visualization/thoughtspot/client/endpoints.py +2 -2
- castor_extractor/visualization/thoughtspot/client/pagination.py +25 -0
- castor_extractor/warehouse/snowflake/queries/column.sql +3 -1
- castor_extractor/warehouse/snowflake/queries/query.sql +19 -11
- castor_extractor/warehouse/snowflake/queries/schema.sql +1 -0
- castor_extractor/warehouse/snowflake/queries/table.sql +2 -2
- {castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/METADATA +27 -3
- {castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/RECORD +19 -18
- castor_extractor/visualization/thoughtspot/client/utils.py +0 -31
- castor_extractor/visualization/thoughtspot/client/utils_test.py +0 -75
- {castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.7 - 2025-04-07
|
|
4
|
+
|
|
5
|
+
* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
|
|
6
|
+
|
|
7
|
+
## 0.24.6 - 2025-04-03
|
|
8
|
+
|
|
9
|
+
* Domo - extract cards metadata by batch to prevent from hitting URL max length
|
|
10
|
+
|
|
11
|
+
## 0.24.5 - 2025-04-02
|
|
12
|
+
|
|
13
|
+
* bump dependencies: google-cloud-storage
|
|
14
|
+
|
|
15
|
+
## 0.24.4 - 2025-03-19
|
|
16
|
+
|
|
17
|
+
* Snowflake:
|
|
18
|
+
* improve the list of ignored queries in the query history extraction
|
|
19
|
+
* ignore the following query types : CALL, COMMENT, EXPLAIN, REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION, REVOKE, TRUNCATE_TABLE, UNDROP
|
|
20
|
+
* ignore queries with empty text
|
|
21
|
+
* filter out schemas with empty names
|
|
22
|
+
|
|
23
|
+
## 0.24.3 - 2025-03-18
|
|
24
|
+
|
|
25
|
+
* Replace ThoughtSpot endpoint `/api/rest/2.0/report/liveboard` with `/api/rest/2.0/metadata/liveboard/data` following the deprecation of the CSV option
|
|
26
|
+
|
|
3
27
|
## 0.24.2 - 2025-03-17
|
|
4
28
|
|
|
5
29
|
* Rename Revamped Tableau Connector classes
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from typing import Iterator, List, TypeVar
|
|
2
|
+
|
|
3
|
+
T = TypeVar("T")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def batch_of_length(
|
|
7
|
+
elements: List[T],
|
|
8
|
+
batch_size: int,
|
|
9
|
+
) -> Iterator[List[T]]:
|
|
10
|
+
"""
|
|
11
|
+
Split the given elements into smaller chunks
|
|
12
|
+
"""
|
|
13
|
+
assert batch_size > 1, "batch size must be greater or equal to 1"
|
|
14
|
+
element_count = len(elements)
|
|
15
|
+
for index in range(0, element_count, batch_size):
|
|
16
|
+
yield elements[index : min((index + batch_size), element_count)]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from .batch import batch_of_length
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_batch_of_length():
|
|
7
|
+
elements = ["a", "b", "c", "d", "e", "f", "g", "h"]
|
|
8
|
+
result = list(batch_of_length(elements, 3))
|
|
9
|
+
assert result == [
|
|
10
|
+
["a", "b", "c"],
|
|
11
|
+
["d", "e", "f"],
|
|
12
|
+
["g", "h"],
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
result = list(batch_of_length(elements, 1000))
|
|
16
|
+
assert result == [
|
|
17
|
+
elements,
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
result = list(batch_of_length(elements, 7))
|
|
21
|
+
assert result == [
|
|
22
|
+
["a", "b", "c", "d", "e", "f", "g"],
|
|
23
|
+
["h"],
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
with pytest.raises(AssertionError):
|
|
27
|
+
list(batch_of_length(elements, -12))
|
|
@@ -9,6 +9,7 @@ import requests
|
|
|
9
9
|
from ....utils import (
|
|
10
10
|
RequestSafeMode,
|
|
11
11
|
at_midnight,
|
|
12
|
+
batch_of_length,
|
|
12
13
|
current_date,
|
|
13
14
|
empty_iterator,
|
|
14
15
|
handle_response,
|
|
@@ -48,6 +49,8 @@ _RETRY_BASE_MS = 10 * 60 * 1000 # 10 minutes
|
|
|
48
49
|
|
|
49
50
|
_PARENT_FOLDER = "/Dashboards"
|
|
50
51
|
|
|
52
|
+
_CARDS_BATCH_SIZE = 100
|
|
53
|
+
|
|
51
54
|
logger = logging.getLogger(__name__)
|
|
52
55
|
|
|
53
56
|
|
|
@@ -156,16 +159,19 @@ class DomoClient:
|
|
|
156
159
|
|
|
157
160
|
return all_results
|
|
158
161
|
|
|
162
|
+
def _cards_metadata(self, card_ids: list[int]) -> Iterator[dict]:
|
|
163
|
+
# batch to avoid hitting the URL max length
|
|
164
|
+
for batch_card_ids in batch_of_length(card_ids, _CARDS_BATCH_SIZE):
|
|
165
|
+
endpoint = self._endpoint_factory.cards_metadata(batch_card_ids)
|
|
166
|
+
yield from self._get_element(endpoint)
|
|
167
|
+
|
|
159
168
|
def _datasources(self, card_ids: list[int]) -> RawData:
|
|
160
169
|
"""Yields all distinct datasources associated to the given cards"""
|
|
161
170
|
if not card_ids:
|
|
162
171
|
return empty_iterator()
|
|
163
172
|
|
|
164
|
-
endpoint = self._endpoint_factory.cards_metadata(card_ids)
|
|
165
|
-
cards_metadata = self._get_element(endpoint)
|
|
166
|
-
|
|
167
173
|
processed: set[str] = set()
|
|
168
|
-
for card in
|
|
174
|
+
for card in self._cards_metadata(card_ids):
|
|
169
175
|
for datasource in card["datasources"]:
|
|
170
176
|
id_ = datasource["dataSourceId"]
|
|
171
177
|
if id_ in processed:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Iterator
|
|
2
3
|
from typing import Optional
|
|
3
4
|
|
|
@@ -9,15 +10,14 @@ from ..constants import DEFAULT_PAGE_SIZE
|
|
|
9
10
|
from .errors import TableauApiError, TableauApiTimeout
|
|
10
11
|
from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
|
|
11
12
|
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
12
15
|
# increase the value when extraction is too slow
|
|
13
16
|
# decrease the value when timeouts arise
|
|
14
17
|
_CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
|
|
15
|
-
# for some clients, extraction of columns tend to hit the node limit
|
|
16
|
-
# https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
|
|
17
|
-
# the workaround is to reduce pagination
|
|
18
|
-
TableauAsset.COLUMN: 50,
|
|
19
18
|
# fields are light but volumes are bigger
|
|
20
19
|
TableauAsset.FIELD: 1000,
|
|
20
|
+
# tables are sometimes heavy
|
|
21
21
|
TableauAsset.TABLE: 50,
|
|
22
22
|
}
|
|
23
23
|
|
|
@@ -51,8 +51,9 @@ def _check_errors(answer: dict) -> None:
|
|
|
51
51
|
|
|
52
52
|
def gql_query_scroll(
|
|
53
53
|
server,
|
|
54
|
-
query: str,
|
|
55
54
|
resource: str,
|
|
55
|
+
fields: str,
|
|
56
|
+
page_size: int,
|
|
56
57
|
) -> Iterator[SerializedAsset]:
|
|
57
58
|
"""
|
|
58
59
|
Iterate over GQL query results, handling pagination and cursor
|
|
@@ -67,23 +68,27 @@ def gql_query_scroll(
|
|
|
67
68
|
max_retries=_RETRY_COUNT,
|
|
68
69
|
base_ms=_RETRY_BASE_MS,
|
|
69
70
|
)
|
|
70
|
-
def _call(
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
71
|
+
def _call(first: int, offset: int) -> dict:
|
|
72
|
+
query = QUERY_TEMPLATE.format(
|
|
73
|
+
resource=resource,
|
|
74
|
+
fields=fields,
|
|
75
|
+
first=first,
|
|
76
|
+
offset=offset,
|
|
77
|
+
)
|
|
78
|
+
answer = server.metadata.query(query)
|
|
75
79
|
_check_errors(answer)
|
|
76
80
|
return answer["data"][f"{resource}Connection"]
|
|
77
81
|
|
|
78
|
-
|
|
82
|
+
current_offset = 0
|
|
79
83
|
while True:
|
|
80
|
-
payload = _call(
|
|
84
|
+
payload = _call(first=page_size, offset=current_offset)
|
|
81
85
|
yield payload["nodes"]
|
|
82
86
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
+
current_offset += len(payload["nodes"])
|
|
88
|
+
total = payload["totalCount"]
|
|
89
|
+
logger.info(f"Extracted {current_offset}/{total} {resource}")
|
|
90
|
+
|
|
91
|
+
if not payload["pageInfo"]["hasNextPage"]:
|
|
87
92
|
break
|
|
88
93
|
|
|
89
94
|
|
|
@@ -107,12 +112,12 @@ class TableauClientMetadataApi:
|
|
|
107
112
|
fields: str,
|
|
108
113
|
page_size: int = DEFAULT_PAGE_SIZE,
|
|
109
114
|
) -> SerializedAsset:
|
|
110
|
-
|
|
115
|
+
result_pages = gql_query_scroll(
|
|
116
|
+
self._server,
|
|
111
117
|
resource=resource,
|
|
112
118
|
fields=fields,
|
|
113
119
|
page_size=page_size,
|
|
114
120
|
)
|
|
115
|
-
result_pages = gql_query_scroll(self._server, query, resource)
|
|
116
121
|
return [asset for page in result_pages for asset in page]
|
|
117
122
|
|
|
118
123
|
def _page_size(self, asset: TableauAsset) -> int:
|
|
@@ -1,13 +1,17 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Iterator
|
|
2
|
-
from
|
|
3
|
+
from functools import partial
|
|
4
|
+
from typing import Iterable, Optional
|
|
3
5
|
|
|
4
6
|
import requests
|
|
7
|
+
from requests import Response
|
|
5
8
|
|
|
6
9
|
from ....utils import (
|
|
7
10
|
APIClient,
|
|
8
11
|
BearerAuth,
|
|
9
12
|
RequestSafeMode,
|
|
10
13
|
build_url,
|
|
14
|
+
fetch_all_pages,
|
|
11
15
|
handle_response,
|
|
12
16
|
)
|
|
13
17
|
from ..assets import (
|
|
@@ -19,9 +23,7 @@ from .credentials import (
|
|
|
19
23
|
from .endpoints import (
|
|
20
24
|
ThoughtspotEndpointFactory,
|
|
21
25
|
)
|
|
22
|
-
from .
|
|
23
|
-
usage_liveboard_reader,
|
|
24
|
-
)
|
|
26
|
+
from .pagination import METADATA_BATCH_SIZE, ThoughtSpotPagination
|
|
25
27
|
|
|
26
28
|
_AUTH_TIMEOUT_S = 60
|
|
27
29
|
_THOUGHTSPOT_HEADERS = {
|
|
@@ -29,7 +31,6 @@ _THOUGHTSPOT_HEADERS = {
|
|
|
29
31
|
"Accept": "application/json",
|
|
30
32
|
"Content-Type": "application/json",
|
|
31
33
|
}
|
|
32
|
-
_METADATA_BATCH_SIZE = 100
|
|
33
34
|
# https://docs.thoughtspot.com/cloud/latest/object-usage-liveboard
|
|
34
35
|
_OBJECT_USAGE_LIVEBOARD = "Object Usage"
|
|
35
36
|
_ANSWER_USAGE_VIZ = "Answer Usage, by User"
|
|
@@ -40,6 +41,9 @@ _LIVEBOARD_USAGE_VIZ = "Popular Liveboards Last 30 Days"
|
|
|
40
41
|
THOUGHTSPOT_SAFE_MODE = RequestSafeMode()
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
|
|
43
47
|
class ThoughtspotBearerAuth(BearerAuth):
|
|
44
48
|
def __init__(self, host: str, token_payload: dict[str, str]):
|
|
45
49
|
auth_endpoint = ThoughtspotEndpointFactory.authentication()
|
|
@@ -86,7 +90,7 @@ class ThoughtspotClient(APIClient):
|
|
|
86
90
|
search_filters = {
|
|
87
91
|
"metadata": [{"type": metadata_type}],
|
|
88
92
|
"include_details": True,
|
|
89
|
-
"record_size":
|
|
93
|
+
"record_size": METADATA_BATCH_SIZE,
|
|
90
94
|
"record_offset": offset,
|
|
91
95
|
}
|
|
92
96
|
if identifier:
|
|
@@ -100,9 +104,9 @@ class ThoughtspotClient(APIClient):
|
|
|
100
104
|
data=search_filters,
|
|
101
105
|
)
|
|
102
106
|
yield from metadata
|
|
103
|
-
if len(metadata) <
|
|
107
|
+
if len(metadata) < METADATA_BATCH_SIZE:
|
|
104
108
|
break
|
|
105
|
-
offset = offset +
|
|
109
|
+
offset = offset + METADATA_BATCH_SIZE
|
|
106
110
|
|
|
107
111
|
def _get_all_answers(self) -> Iterator[dict]:
|
|
108
112
|
yield from self._metadata_search(metadata_type="ANSWER")
|
|
@@ -120,7 +124,7 @@ class ThoughtspotClient(APIClient):
|
|
|
120
124
|
self,
|
|
121
125
|
liveboard_name: str,
|
|
122
126
|
visualization_name: str,
|
|
123
|
-
) -> Iterator[
|
|
127
|
+
) -> Iterator[list[list]]:
|
|
124
128
|
"""
|
|
125
129
|
Yields the data of a given visualization in the given liveboard.
|
|
126
130
|
ThoughtSpot maintains two system liveboards with stats about data usage,
|
|
@@ -133,29 +137,62 @@ class ThoughtspotClient(APIClient):
|
|
|
133
137
|
)
|
|
134
138
|
liveboard_id = usage_liveboard["metadata_id"]
|
|
135
139
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
140
|
+
def handler(response: Response) -> dict:
|
|
141
|
+
response_dict = response.json()
|
|
142
|
+
contents = response_dict.get("contents", [])
|
|
143
|
+
if not contents:
|
|
144
|
+
logger.warning("No data found in response")
|
|
145
|
+
return dict()
|
|
146
|
+
return contents[0]
|
|
147
|
+
|
|
148
|
+
request = partial(
|
|
149
|
+
self._post,
|
|
150
|
+
endpoint=ThoughtspotEndpointFactory.liveboard_data(),
|
|
139
151
|
data={
|
|
140
152
|
"metadata_identifier": liveboard_id,
|
|
141
|
-
"file_format": "CSV",
|
|
142
153
|
"visualization_identifiers": [visualization_name],
|
|
154
|
+
"record_offset": 0,
|
|
155
|
+
"record_size": METADATA_BATCH_SIZE,
|
|
143
156
|
},
|
|
144
|
-
handler=
|
|
157
|
+
handler=handler,
|
|
145
158
|
)
|
|
146
|
-
yield from
|
|
159
|
+
yield from fetch_all_pages(request, ThoughtSpotPagination)
|
|
147
160
|
|
|
148
161
|
def _get_answer_usages(self) -> Iterator[dict]:
|
|
149
|
-
|
|
162
|
+
"""
|
|
163
|
+
Returns the usage data of saved Answers, which is found in a visualization
|
|
164
|
+
of the "Object Usage" liveboard.
|
|
165
|
+
Each data row returned by the API is transformed from a list into a dictionary.
|
|
166
|
+
The columns are explicitly listed here because in the API response,
|
|
167
|
+
there is a mismatch between the number of column names and the number
|
|
168
|
+
of values per data row.
|
|
169
|
+
"""
|
|
170
|
+
data: Iterable[list[list]] = self._get_usages(
|
|
150
171
|
liveboard_name=_OBJECT_USAGE_LIVEBOARD,
|
|
151
172
|
visualization_name=_ANSWER_USAGE_VIZ,
|
|
152
173
|
)
|
|
174
|
+
columns = (
|
|
175
|
+
"Answer name",
|
|
176
|
+
"Number of unique users",
|
|
177
|
+
"Count of object interactions",
|
|
178
|
+
)
|
|
179
|
+
for row in data:
|
|
180
|
+
yield dict(zip(columns, row))
|
|
153
181
|
|
|
154
182
|
def _get_liveboards_usages(self) -> Iterator[dict]:
|
|
155
|
-
|
|
183
|
+
"""
|
|
184
|
+
Returns the usage data of Liveboards, which is found in a visualization
|
|
185
|
+
of the "User Adoption" liveboard.
|
|
186
|
+
Each data row returned by the API is transformed from a list into a dictionary.
|
|
187
|
+
See `_get_answer_usages` regarding the columns list.
|
|
188
|
+
"""
|
|
189
|
+
data: Iterable[list[list]] = self._get_usages(
|
|
156
190
|
liveboard_name=_USER_ADOPTION_LIVEBOARD,
|
|
157
191
|
visualization_name=_LIVEBOARD_USAGE_VIZ,
|
|
158
192
|
)
|
|
193
|
+
columns = ("Pinboard", "Unique Number of User", "Pinboard Views")
|
|
194
|
+
for row in data:
|
|
195
|
+
yield dict(zip(columns, row))
|
|
159
196
|
|
|
160
197
|
def fetch(self, asset: ThoughtspotAsset) -> Iterator[dict]:
|
|
161
198
|
if asset == ThoughtspotAsset.ANSWERS:
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from pydantic import ConfigDict, Field
|
|
2
|
+
|
|
3
|
+
from ....utils import PaginationModel
|
|
4
|
+
|
|
5
|
+
METADATA_BATCH_SIZE = 100
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ThoughtSpotPagination(PaginationModel):
|
|
9
|
+
data_rows: list = Field(default_factory=list)
|
|
10
|
+
record_offset: int
|
|
11
|
+
record_size: int
|
|
12
|
+
|
|
13
|
+
model_config = ConfigDict(
|
|
14
|
+
populate_by_name=True,
|
|
15
|
+
from_attributes=True,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def is_last(self) -> bool:
|
|
19
|
+
return len(self.data_rows) < METADATA_BATCH_SIZE
|
|
20
|
+
|
|
21
|
+
def next_page_payload(self) -> dict:
|
|
22
|
+
return {"record_offset": self.record_offset + METADATA_BATCH_SIZE}
|
|
23
|
+
|
|
24
|
+
def page_results(self) -> list:
|
|
25
|
+
return self.data_rows
|
|
@@ -47,7 +47,9 @@ FROM snowflake.account_usage.columns AS c
|
|
|
47
47
|
JOIN snowflake.account_usage.tables AS t ON t.table_id = c.table_id
|
|
48
48
|
JOIN tags_agg_columns ta ON c.column_id = ta.column_id
|
|
49
49
|
WHERE TRUE
|
|
50
|
-
AND COALESCE(c.column_name, '') != ''
|
|
50
|
+
AND TRIM(COALESCE(c.column_name, '')) != ''
|
|
51
|
+
AND TRIM(COALESCE(t.table_name, '')) != ''
|
|
52
|
+
AND TRIM(COALESCE(s.schema_name, '')) != ''
|
|
51
53
|
AND UPPER(c.table_catalog) NOT IN ('SNOWFLAKE', 'UTIL_DB')
|
|
52
54
|
AND (
|
|
53
55
|
c.deleted IS NULL
|
|
@@ -51,20 +51,28 @@ WHERE TRUE
|
|
|
51
51
|
AND HOUR(CONVERT_TIMEZONE('UTC', start_time)) BETWEEN :hour_min AND :hour_max
|
|
52
52
|
AND execution_status = 'SUCCESS'
|
|
53
53
|
AND query_text != 'SELECT 1'
|
|
54
|
+
AND TRIM(COALESCE(query_text, '')) != ''
|
|
54
55
|
AND query_type NOT IN (
|
|
55
|
-
'SHOW',
|
|
56
|
-
'USE',
|
|
57
|
-
'ROLLBACK',
|
|
58
|
-
'DESCRIBE',
|
|
59
56
|
'ALTER_SESSION',
|
|
60
|
-
'
|
|
57
|
+
'BEGIN_TRANSACTION',
|
|
58
|
+
'CALL',
|
|
59
|
+
'COMMENT',
|
|
60
|
+
'COMMIT',
|
|
61
61
|
'CREATE', -- create objects: stage|function|schema|procedure|file|storage|pipe|notification integration
|
|
62
|
-
'
|
|
62
|
+
'DESCRIBE',
|
|
63
|
+
'DROP',
|
|
64
|
+
'EXPLAIN',
|
|
65
|
+
'GET_FILES',
|
|
63
66
|
'GRANT',
|
|
64
|
-
'
|
|
67
|
+
'PUT_FILES',
|
|
68
|
+
'REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION',
|
|
69
|
+
'REMOVE_FILES',
|
|
70
|
+
'REVOKE',
|
|
71
|
+
'ROLLBACK',
|
|
72
|
+
'SET',
|
|
73
|
+
'SHOW',
|
|
74
|
+
'TRUNCATE_TABLE',
|
|
75
|
+
'UNDROP',
|
|
65
76
|
'UNLOAD',
|
|
66
|
-
'
|
|
67
|
-
'DROP',
|
|
68
|
-
'BEGIN_TRANSACTION',
|
|
69
|
-
'REMOVE_FILES'
|
|
77
|
+
'USE'
|
|
70
78
|
)
|
|
@@ -16,6 +16,7 @@ WHERE TRUE
|
|
|
16
16
|
deleted IS NULL
|
|
17
17
|
OR deleted > CURRENT_TIMESTAMP - INTERVAL '1 day'
|
|
18
18
|
)
|
|
19
|
+
AND TRIM(COALESCE(schema_name, '')) != ''
|
|
19
20
|
{database_allowed}
|
|
20
21
|
{database_blocked}
|
|
21
22
|
AND CASE {has_fetch_transient} WHEN FALSE THEN NOT s.is_transient::BOOLEAN ELSE TRUE END
|
|
@@ -41,8 +41,8 @@ FROM snowflake.account_usage.tables AS t
|
|
|
41
41
|
JOIN snowflake.account_usage.schemata AS s ON s.schema_id = t.table_schema_id
|
|
42
42
|
JOIN tags_agg_tables ta ON t.table_id = ta.table_id
|
|
43
43
|
WHERE TRUE
|
|
44
|
-
AND t.table_name
|
|
45
|
-
AND
|
|
44
|
+
AND TRIM(COALESCE(t.table_name, '')) != ''
|
|
45
|
+
AND TRIM(COALESCE(s.schema_name, '')) != ''
|
|
46
46
|
AND UPPER(t.table_catalog) NOT IN ('SNOWFLAKE', 'UTIL_DB')
|
|
47
47
|
AND (
|
|
48
48
|
t.deleted IS NULL
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.7
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -35,7 +35,7 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
|
35
35
|
Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
|
|
36
36
|
Requires-Dist: google-auth (>=2,<3)
|
|
37
37
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
38
|
-
Requires-Dist: google-cloud-storage (>=
|
|
38
|
+
Requires-Dist: google-cloud-storage (>=3.1.0,<4.0.0)
|
|
39
39
|
Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
|
|
40
40
|
Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
|
|
41
41
|
Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
|
|
@@ -51,7 +51,7 @@ Requires-Dist: pymssql (>=2.2.11,<3.0.0) ; extra == "sqlserver" or extra == "all
|
|
|
51
51
|
Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all"
|
|
52
52
|
Requires-Dist: python-dateutil (>=2.0.0,<=3.0.0)
|
|
53
53
|
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
|
54
|
-
Requires-Dist: setuptools (>=
|
|
54
|
+
Requires-Dist: setuptools (>=78.1)
|
|
55
55
|
Requires-Dist: snowflake-connector-python (>=3.4.0,<4.0.0) ; extra == "snowflake" or extra == "all"
|
|
56
56
|
Requires-Dist: snowflake-sqlalchemy (!=1.2.5,<2.0.0) ; extra == "snowflake" or extra == "all"
|
|
57
57
|
Requires-Dist: sqlalchemy (>=1.4,<1.5)
|
|
@@ -210,6 +210,30 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
|
|
|
210
210
|
|
|
211
211
|
# Changelog
|
|
212
212
|
|
|
213
|
+
## 0.24.7 - 2025-04-07
|
|
214
|
+
|
|
215
|
+
* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
|
|
216
|
+
|
|
217
|
+
## 0.24.6 - 2025-04-03
|
|
218
|
+
|
|
219
|
+
* Domo - extract cards metadata by batch to prevent from hitting URL max length
|
|
220
|
+
|
|
221
|
+
## 0.24.5 - 2025-04-02
|
|
222
|
+
|
|
223
|
+
* bump dependencies: google-cloud-storage
|
|
224
|
+
|
|
225
|
+
## 0.24.4 - 2025-03-19
|
|
226
|
+
|
|
227
|
+
* Snowflake:
|
|
228
|
+
* improve the list of ignored queries in the query history extraction
|
|
229
|
+
* ignore the following query types : CALL, COMMENT, EXPLAIN, REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION, REVOKE, TRUNCATE_TABLE, UNDROP
|
|
230
|
+
* ignore queries with empty text
|
|
231
|
+
* filter out schemas with empty names
|
|
232
|
+
|
|
233
|
+
## 0.24.3 - 2025-03-18
|
|
234
|
+
|
|
235
|
+
* Replace ThoughtSpot endpoint `/api/rest/2.0/report/liveboard` with `/api/rest/2.0/metadata/liveboard/data` following the deprecation of the CSV option
|
|
236
|
+
|
|
213
237
|
## 0.24.2 - 2025-03-17
|
|
214
238
|
|
|
215
239
|
* Rename Revamped Tableau Connector classes
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=UWuENqrKnLu244f4Of6dtZ59XZ7jrLWkcQni3MqXPBg,16667
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -77,9 +77,11 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
|
|
|
77
77
|
castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
|
|
78
78
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
79
79
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
80
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
80
|
+
castor_extractor/utils/__init__.py,sha256=KQkr_CmxWG0Vpu7CaqjbJkffUeEWcyeA9Cbm394Hygk,1585
|
|
81
81
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
82
82
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
83
|
+
castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
|
|
84
|
+
castor_extractor/utils/batch_test.py,sha256=84JYXOxiTkZFAceVh0mzN6VtKxcqoFPbxkZfIDyLGlg,606
|
|
83
85
|
castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
|
|
84
86
|
castor_extractor/utils/client/abstract.py,sha256=CWF7_afNpEZ3jor-22wXbKIvM20ukHkaDy_uknKz8B0,2075
|
|
85
87
|
castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
|
|
@@ -146,7 +148,7 @@ castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
146
148
|
castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
|
|
147
149
|
castor_extractor/visualization/domo/assets.py,sha256=bK1urFR2tnlWkVkkhR32mAKMoKbESNlop-CNGx-65PY,206
|
|
148
150
|
castor_extractor/visualization/domo/client/__init__.py,sha256=Do0fU4B8Hhlhahcv734gnJl_ryCztfTBDea7XNCKfB8,72
|
|
149
|
-
castor_extractor/visualization/domo/client/client.py,sha256=
|
|
151
|
+
castor_extractor/visualization/domo/client/client.py,sha256=bgzXWUm-UnTIwgyJKaJkoHzQpDYwWCGCe97MsMFw6ng,9930
|
|
150
152
|
castor_extractor/visualization/domo/client/credentials.py,sha256=4gnsk4Tpt3ggdUYbvyNPJEXeCyTy12s-X24P5hFdULg,873
|
|
151
153
|
castor_extractor/visualization/domo/client/endpoints.py,sha256=eIE9oeZ_cmJSWWDuyxh6JaAOs3y5bTJQQ265HYgpulE,2775
|
|
152
154
|
castor_extractor/visualization/domo/client/pagination.py,sha256=ukVkHVzoH4mfZ29H9YcnC2YrdVolP10wv25J6Q3ehRw,821
|
|
@@ -264,23 +266,22 @@ castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwy
|
|
|
264
266
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
265
267
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
266
268
|
castor_extractor/visualization/tableau/client/client.py,sha256=zzqhzIqKyJygo4ZNGk6cZh0e6Z9R1W5T0P9un52KC1M,7626
|
|
267
|
-
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=
|
|
269
|
+
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=VHNV1Q0EVKuiFKm1yKSx4tIuPGww4Mlw3yui2DgKe7I,4196
|
|
268
270
|
castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
|
|
269
271
|
castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
|
|
270
272
|
castor_extractor/visualization/tableau/client/credentials.py,sha256=uQICIgeXmLZfOroTgZt7PuKNKTyqQllRGSTcOmIfrKU,1893
|
|
271
273
|
castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9ykblA0nvd75j5-_QDFupjV48zJQ,300
|
|
272
|
-
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=
|
|
274
|
+
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
|
|
273
275
|
castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
|
|
274
276
|
castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
|
|
275
277
|
castor_extractor/visualization/tableau/extract.py,sha256=FnjmmUdNA9MEf3S5Tw37x6ZXxVsK8R3YnVk1UVYbaZk,1423
|
|
276
278
|
castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
|
|
277
279
|
castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
|
|
278
280
|
castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
|
|
279
|
-
castor_extractor/visualization/thoughtspot/client/client.py,sha256=
|
|
281
|
+
castor_extractor/visualization/thoughtspot/client/client.py,sha256=lRNkigPV2MTozgBzFkij7mCXMMRqXzPtNs8EEi_f3tk,7127
|
|
280
282
|
castor_extractor/visualization/thoughtspot/client/credentials.py,sha256=fp4YHiZy-dstWiLr5c4kFU9SyPK5rd2nCeh8k5sVRpM,462
|
|
281
|
-
castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=
|
|
282
|
-
castor_extractor/visualization/thoughtspot/client/
|
|
283
|
-
castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=2XysRU7a58KA2JgNwU2j4GPrN0rkN7Gvk8kQCJlYXVk,2469
|
|
283
|
+
castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=XLDGs7v2e2S2VdJX8cQjMh80KNCHb_H5A9I8ejP1ZPs,342
|
|
284
|
+
castor_extractor/visualization/thoughtspot/client/pagination.py,sha256=iosYUJ7ZMT1G_Jm6AXPwczYnXFzS6Yez-B9-tRFiV_w,619
|
|
284
285
|
castor_extractor/visualization/thoughtspot/extract.py,sha256=mcXS0jGFpa50td98AVbbTqxchyI5wDCpB-v1o5iRc3g,1354
|
|
285
286
|
castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
286
287
|
castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
|
|
@@ -380,16 +381,16 @@ castor_extractor/warehouse/snowflake/credentials.py,sha256=u0sZ6xPtcZmmvnUsAejJk
|
|
|
380
381
|
castor_extractor/warehouse/snowflake/credentials_test.py,sha256=Lkc-DHXOvr50KrqAW4nt_x0IA0Mu_CsBVu6ATnzQB6I,673
|
|
381
382
|
castor_extractor/warehouse/snowflake/extract.py,sha256=3yc9kcVtt2c1uWJOJJgeZchV4VmRr9EeYM3W6gl8zQQ,3201
|
|
382
383
|
castor_extractor/warehouse/snowflake/queries/.sqlfluff,sha256=vttrwcr64JVIuvc7WIg9C54cbOkjg_VjXNR7YnTGOPE,31
|
|
383
|
-
castor_extractor/warehouse/snowflake/queries/column.sql,sha256=
|
|
384
|
+
castor_extractor/warehouse/snowflake/queries/column.sql,sha256=Ru-yC0s76I9LehOA4aCZ--xz6D9H1Hyr3OZdILOBHAw,1882
|
|
384
385
|
castor_extractor/warehouse/snowflake/queries/column_lineage.sql,sha256=YKBiZ6zySSNcXLDXwm31EjGIIkkkZc0-S6hI1SRM80o,1179
|
|
385
386
|
castor_extractor/warehouse/snowflake/queries/database.sql,sha256=ifZXoKUXtsrGOxml6AcNhA4yybIyatH5va7bcp-lgCU,483
|
|
386
387
|
castor_extractor/warehouse/snowflake/queries/function.sql,sha256=8LRh0ybhd-RldJ8UZspWUm3yv52evq11O2uqIO4KqeQ,372
|
|
387
388
|
castor_extractor/warehouse/snowflake/queries/grant_to_role.sql,sha256=O7AJ1LzoXGDFmiVvQ8EMJ5x8FSAnaxRPdmRyAlEmkUM,272
|
|
388
389
|
castor_extractor/warehouse/snowflake/queries/grant_to_user.sql,sha256=7AalVajU5vRRpIiys1igSwmDXirbwpMTvJr2ihSz2NE,143
|
|
389
|
-
castor_extractor/warehouse/snowflake/queries/query.sql,sha256
|
|
390
|
+
castor_extractor/warehouse/snowflake/queries/query.sql,sha256=w4T6-TgwUozDgaF3Fk-qex7bDdEIHLkkB5XEe2VJXZQ,1992
|
|
390
391
|
castor_extractor/warehouse/snowflake/queries/role.sql,sha256=D0VvGxLZMwug2SvefhAsNR9YIun0fZvcDWkz891xSYM,96
|
|
391
|
-
castor_extractor/warehouse/snowflake/queries/schema.sql,sha256=
|
|
392
|
-
castor_extractor/warehouse/snowflake/queries/table.sql,sha256=
|
|
392
|
+
castor_extractor/warehouse/snowflake/queries/schema.sql,sha256=iLn6_y5rn63KigjE4GEAMp8ZuZZofhMXYGb8saPDGUc,776
|
|
393
|
+
castor_extractor/warehouse/snowflake/queries/table.sql,sha256=CbSLfJAylyyyD3mkGPSLLE7BHrGjlY499kzO9RN0e4Y,1473
|
|
393
394
|
castor_extractor/warehouse/snowflake/queries/user.sql,sha256=88V8eRj1NDaD_ufclsKOHHlqCtBMQHOV54yy6RKJaXk,570
|
|
394
395
|
castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3N7BKkbXVM3RoIzqSDtohqRnE5kg4,673
|
|
395
396
|
castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
|
|
@@ -404,8 +405,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
404
405
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
405
406
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
406
407
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
407
|
-
castor_extractor-0.24.
|
|
408
|
-
castor_extractor-0.24.
|
|
409
|
-
castor_extractor-0.24.
|
|
410
|
-
castor_extractor-0.24.
|
|
411
|
-
castor_extractor-0.24.
|
|
408
|
+
castor_extractor-0.24.7.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
409
|
+
castor_extractor-0.24.7.dist-info/METADATA,sha256=qWp3OBv1FO123RJqz2YKTEd12WzhKoDmcxVZLhvzn6M,23831
|
|
410
|
+
castor_extractor-0.24.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
411
|
+
castor_extractor-0.24.7.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
|
|
412
|
+
castor_extractor-0.24.7.dist-info/RECORD,,
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import csv
|
|
2
|
-
import re
|
|
3
|
-
from collections.abc import Iterator
|
|
4
|
-
from io import StringIO
|
|
5
|
-
|
|
6
|
-
_END_OF_GENERATED_TEXT = r'^""$'
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def usage_liveboard_reader(usage_liveboard_csv: str) -> Iterator[dict]:
|
|
10
|
-
"""
|
|
11
|
-
Converts a CSV string into an iterator of dictionaries after
|
|
12
|
-
ignoring the generated text that preceeds the actual CSV header row.
|
|
13
|
-
The generated block ends with a row containing only two double quotes.
|
|
14
|
-
Here is an example:
|
|
15
|
-
|
|
16
|
-
"Data extract produced by Castor on 09/19/2024 06:54"
|
|
17
|
-
"Filters applied on data :"
|
|
18
|
-
"User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
|
|
19
|
-
"Pinboard NOT IN [mlm - availability pinboard,null]"
|
|
20
|
-
"Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
|
|
21
|
-
"Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
|
|
22
|
-
""
|
|
23
|
-
|
|
24
|
-
"""
|
|
25
|
-
csv_file = StringIO(usage_liveboard_csv)
|
|
26
|
-
|
|
27
|
-
line = next(csv_file)
|
|
28
|
-
while not re.match(_END_OF_GENERATED_TEXT, line.strip()):
|
|
29
|
-
line = next(csv_file)
|
|
30
|
-
|
|
31
|
-
yield from csv.DictReader(csv_file)
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
from .utils import (
|
|
2
|
-
usage_liveboard_reader,
|
|
3
|
-
)
|
|
4
|
-
|
|
5
|
-
VALID_CSV_1 = '''"Data extract produced by Castor on 09/19/2024 06:54"
|
|
6
|
-
"Filters applied on data :"
|
|
7
|
-
"User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
|
|
8
|
-
"Pinboard NOT IN [mlm - availability pinboard,null]"
|
|
9
|
-
"Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
|
|
10
|
-
"Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
|
|
11
|
-
""
|
|
12
|
-
"Pinboard","Pinboard Views","Unique Number of User"
|
|
13
|
-
"Market Report","559","19"
|
|
14
|
-
"Retailer report","204","14"
|
|
15
|
-
"Second-hand market","72","6"
|
|
16
|
-
"September test","25","2"'''
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
VALID_CSV_2 = '''"Data extract produced by Castor on 01/07/2025 16:07"
|
|
20
|
-
"Filters applied on data :"
|
|
21
|
-
"Timestamp >= 20241208 00:00:00 < 20250107 00:00:00"
|
|
22
|
-
""
|
|
23
|
-
"Answer name","User name","Number of unique users","Count of object interactions"
|
|
24
|
-
"toto","tata","1","666"'''
|
|
25
|
-
|
|
26
|
-
# Invalid CSV input (missing data rows)
|
|
27
|
-
INVALID_CSV = '''"Data extract produced by Castor on 09/19/2024 06:54"
|
|
28
|
-
"Filters applied on data :"
|
|
29
|
-
"User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
|
|
30
|
-
"Pinboard NOT IN [mlm - availability pinboard,null]"
|
|
31
|
-
"Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
|
|
32
|
-
"Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
|
|
33
|
-
""'''
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def test_usage_liveboard_reader():
|
|
37
|
-
expected_output_1 = [
|
|
38
|
-
{
|
|
39
|
-
"Pinboard": "Market Report",
|
|
40
|
-
"Pinboard Views": "559",
|
|
41
|
-
"Unique Number of User": "19",
|
|
42
|
-
},
|
|
43
|
-
{
|
|
44
|
-
"Pinboard": "Retailer report",
|
|
45
|
-
"Pinboard Views": "204",
|
|
46
|
-
"Unique Number of User": "14",
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
"Pinboard": "Second-hand market",
|
|
50
|
-
"Pinboard Views": "72",
|
|
51
|
-
"Unique Number of User": "6",
|
|
52
|
-
},
|
|
53
|
-
{
|
|
54
|
-
"Pinboard": "September test",
|
|
55
|
-
"Pinboard Views": "25",
|
|
56
|
-
"Unique Number of User": "2",
|
|
57
|
-
},
|
|
58
|
-
]
|
|
59
|
-
expected_output_2 = [
|
|
60
|
-
{
|
|
61
|
-
"Answer name": "toto",
|
|
62
|
-
"User name": "tata",
|
|
63
|
-
"Number of unique users": "1",
|
|
64
|
-
"Count of object interactions": "666",
|
|
65
|
-
}
|
|
66
|
-
]
|
|
67
|
-
|
|
68
|
-
result = list(usage_liveboard_reader(VALID_CSV_1))
|
|
69
|
-
assert result == expected_output_1
|
|
70
|
-
|
|
71
|
-
result = list(usage_liveboard_reader(VALID_CSV_2))
|
|
72
|
-
assert result == expected_output_2
|
|
73
|
-
|
|
74
|
-
result = list(usage_liveboard_reader(INVALID_CSV))
|
|
75
|
-
assert result == [] # Expect an empty result since there is no data
|
|
File without changes
|
|
File without changes
|
|
File without changes
|