castor-extractor 0.24.2__py3-none-any.whl → 0.24.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.4 - 2025-03-19
4
+
5
+ * Snowflake:
6
+ * improve the list of ignored queries in the query history extraction
7
+ * ignore the following query types : CALL, COMMENT, EXPLAIN, REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION, REVOKE, TRUNCATE_TABLE, UNDROP
8
+ * ignore queries with empty text
9
+ * filter out schemas with empty names
10
+
11
+ ## 0.24.3 - 2025-03-18
12
+
13
+ * Replace ThoughtSpot endpoint `/api/rest/2.0/report/liveboard` with `/api/rest/2.0/metadata/liveboard/data` following the deprecation of the CSV option
14
+
3
15
  ## 0.24.2 - 2025-03-17
4
16
 
5
17
  * Rename Revamped Tableau Connector classes
@@ -1,13 +1,17 @@
1
+ import logging
1
2
  from collections.abc import Iterator
2
- from typing import Optional
3
+ from functools import partial
4
+ from typing import Iterable, Optional
3
5
 
4
6
  import requests
7
+ from requests import Response
5
8
 
6
9
  from ....utils import (
7
10
  APIClient,
8
11
  BearerAuth,
9
12
  RequestSafeMode,
10
13
  build_url,
14
+ fetch_all_pages,
11
15
  handle_response,
12
16
  )
13
17
  from ..assets import (
@@ -19,9 +23,7 @@ from .credentials import (
19
23
  from .endpoints import (
20
24
  ThoughtspotEndpointFactory,
21
25
  )
22
- from .utils import (
23
- usage_liveboard_reader,
24
- )
26
+ from .pagination import METADATA_BATCH_SIZE, ThoughtSpotPagination
25
27
 
26
28
  _AUTH_TIMEOUT_S = 60
27
29
  _THOUGHTSPOT_HEADERS = {
@@ -29,7 +31,6 @@ _THOUGHTSPOT_HEADERS = {
29
31
  "Accept": "application/json",
30
32
  "Content-Type": "application/json",
31
33
  }
32
- _METADATA_BATCH_SIZE = 100
33
34
  # https://docs.thoughtspot.com/cloud/latest/object-usage-liveboard
34
35
  _OBJECT_USAGE_LIVEBOARD = "Object Usage"
35
36
  _ANSWER_USAGE_VIZ = "Answer Usage, by User"
@@ -40,6 +41,9 @@ _LIVEBOARD_USAGE_VIZ = "Popular Liveboards Last 30 Days"
40
41
  THOUGHTSPOT_SAFE_MODE = RequestSafeMode()
41
42
 
42
43
 
44
+ logger = logging.getLogger(__name__)
45
+
46
+
43
47
  class ThoughtspotBearerAuth(BearerAuth):
44
48
  def __init__(self, host: str, token_payload: dict[str, str]):
45
49
  auth_endpoint = ThoughtspotEndpointFactory.authentication()
@@ -86,7 +90,7 @@ class ThoughtspotClient(APIClient):
86
90
  search_filters = {
87
91
  "metadata": [{"type": metadata_type}],
88
92
  "include_details": True,
89
- "record_size": _METADATA_BATCH_SIZE,
93
+ "record_size": METADATA_BATCH_SIZE,
90
94
  "record_offset": offset,
91
95
  }
92
96
  if identifier:
@@ -100,9 +104,9 @@ class ThoughtspotClient(APIClient):
100
104
  data=search_filters,
101
105
  )
102
106
  yield from metadata
103
- if len(metadata) < _METADATA_BATCH_SIZE:
107
+ if len(metadata) < METADATA_BATCH_SIZE:
104
108
  break
105
- offset = offset + _METADATA_BATCH_SIZE
109
+ offset = offset + METADATA_BATCH_SIZE
106
110
 
107
111
  def _get_all_answers(self) -> Iterator[dict]:
108
112
  yield from self._metadata_search(metadata_type="ANSWER")
@@ -120,7 +124,7 @@ class ThoughtspotClient(APIClient):
120
124
  self,
121
125
  liveboard_name: str,
122
126
  visualization_name: str,
123
- ) -> Iterator[dict]:
127
+ ) -> Iterator[list[list]]:
124
128
  """
125
129
  Yields the data of a given visualization in the given liveboard.
126
130
  ThoughtSpot maintains two system liveboards with stats about data usage,
@@ -133,29 +137,62 @@ class ThoughtspotClient(APIClient):
133
137
  )
134
138
  liveboard_id = usage_liveboard["metadata_id"]
135
139
 
136
- data = self._post(
137
- endpoint=ThoughtspotEndpointFactory.liveboard(),
138
- headers={"Accept": "application/octet-stream"},
140
+ def handler(response: Response) -> dict:
141
+ response_dict = response.json()
142
+ contents = response_dict.get("contents", [])
143
+ if not contents:
144
+ logger.warning("No data found in response")
145
+ return dict()
146
+ return contents[0]
147
+
148
+ request = partial(
149
+ self._post,
150
+ endpoint=ThoughtspotEndpointFactory.liveboard_data(),
139
151
  data={
140
152
  "metadata_identifier": liveboard_id,
141
- "file_format": "CSV",
142
153
  "visualization_identifiers": [visualization_name],
154
+ "record_offset": 0,
155
+ "record_size": METADATA_BATCH_SIZE,
143
156
  },
144
- handler=lambda x: x.text,
157
+ handler=handler,
145
158
  )
146
- yield from usage_liveboard_reader(data)
159
+ yield from fetch_all_pages(request, ThoughtSpotPagination)
147
160
 
148
161
  def _get_answer_usages(self) -> Iterator[dict]:
149
- return self._get_usages(
162
+ """
163
+ Returns the usage data of saved Answers, which is found in a visualization
164
+ of the "Object Usage" liveboard.
165
+ Each data row returned by the API is transformed from a list into a dictionary.
166
+ The columns are explicitly listed here because in the API response,
167
+ there is a mismatch between the number of column names and the number
168
+ of values per data row.
169
+ """
170
+ data: Iterable[list[list]] = self._get_usages(
150
171
  liveboard_name=_OBJECT_USAGE_LIVEBOARD,
151
172
  visualization_name=_ANSWER_USAGE_VIZ,
152
173
  )
174
+ columns = (
175
+ "Answer name",
176
+ "Number of unique users",
177
+ "Count of object interactions",
178
+ )
179
+ for row in data:
180
+ yield dict(zip(columns, row))
153
181
 
154
182
  def _get_liveboards_usages(self) -> Iterator[dict]:
155
- return self._get_usages(
183
+ """
184
+ Returns the usage data of Liveboards, which is found in a visualization
185
+ of the "User Adoption" liveboard.
186
+ Each data row returned by the API is transformed from a list into a dictionary.
187
+ See `_get_answer_usages` regarding the columns list.
188
+ """
189
+ data: Iterable[list[list]] = self._get_usages(
156
190
  liveboard_name=_USER_ADOPTION_LIVEBOARD,
157
191
  visualization_name=_LIVEBOARD_USAGE_VIZ,
158
192
  )
193
+ columns = ("Pinboard", "Unique Number of User", "Pinboard Views")
194
+ for row in data:
195
+ yield dict(zip(columns, row))
159
196
 
160
197
  def fetch(self, asset: ThoughtspotAsset) -> Iterator[dict]:
161
198
  if asset == ThoughtspotAsset.ANSWERS:
@@ -8,5 +8,5 @@ class ThoughtspotEndpointFactory:
8
8
  return "api/rest/2.0/metadata/search"
9
9
 
10
10
  @classmethod
11
- def liveboard(cls) -> str:
12
- return "api/rest/2.0/report/liveboard"
11
+ def liveboard_data(cls) -> str:
12
+ return "api/rest/2.0/metadata/liveboard/data"
@@ -0,0 +1,25 @@
1
+ from pydantic import ConfigDict, Field
2
+
3
+ from ....utils import PaginationModel
4
+
5
+ METADATA_BATCH_SIZE = 100
6
+
7
+
8
+ class ThoughtSpotPagination(PaginationModel):
9
+ data_rows: list = Field(default_factory=list)
10
+ record_offset: int
11
+ record_size: int
12
+
13
+ model_config = ConfigDict(
14
+ populate_by_name=True,
15
+ from_attributes=True,
16
+ )
17
+
18
+ def is_last(self) -> bool:
19
+ return len(self.data_rows) < METADATA_BATCH_SIZE
20
+
21
+ def next_page_payload(self) -> dict:
22
+ return {"record_offset": self.record_offset + METADATA_BATCH_SIZE}
23
+
24
+ def page_results(self) -> list:
25
+ return self.data_rows
@@ -47,7 +47,9 @@ FROM snowflake.account_usage.columns AS c
47
47
  JOIN snowflake.account_usage.tables AS t ON t.table_id = c.table_id
48
48
  JOIN tags_agg_columns ta ON c.column_id = ta.column_id
49
49
  WHERE TRUE
50
- AND COALESCE(c.column_name, '') != ''
50
+ AND TRIM(COALESCE(c.column_name, '')) != ''
51
+ AND TRIM(COALESCE(t.table_name, '')) != ''
52
+ AND TRIM(COALESCE(s.schema_name, '')) != ''
51
53
  AND UPPER(c.table_catalog) NOT IN ('SNOWFLAKE', 'UTIL_DB')
52
54
  AND (
53
55
  c.deleted IS NULL
@@ -51,20 +51,28 @@ WHERE TRUE
51
51
  AND HOUR(CONVERT_TIMEZONE('UTC', start_time)) BETWEEN :hour_min AND :hour_max
52
52
  AND execution_status = 'SUCCESS'
53
53
  AND query_text != 'SELECT 1'
54
+ AND TRIM(COALESCE(query_text, '')) != ''
54
55
  AND query_type NOT IN (
55
- 'SHOW',
56
- 'USE',
57
- 'ROLLBACK',
58
- 'DESCRIBE',
59
56
  'ALTER_SESSION',
60
- 'PUT_FILES',
57
+ 'BEGIN_TRANSACTION',
58
+ 'CALL',
59
+ 'COMMENT',
60
+ 'COMMIT',
61
61
  'CREATE', -- create objects: stage|function|schema|procedure|file|storage|pipe|notification integration
62
- 'SET',
62
+ 'DESCRIBE',
63
+ 'DROP',
64
+ 'EXPLAIN',
65
+ 'GET_FILES',
63
66
  'GRANT',
64
- 'COMMIT',
67
+ 'PUT_FILES',
68
+ 'REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION',
69
+ 'REMOVE_FILES',
70
+ 'REVOKE',
71
+ 'ROLLBACK',
72
+ 'SET',
73
+ 'SHOW',
74
+ 'TRUNCATE_TABLE',
75
+ 'UNDROP',
65
76
  'UNLOAD',
66
- 'GET_FILES',
67
- 'DROP',
68
- 'BEGIN_TRANSACTION',
69
- 'REMOVE_FILES'
77
+ 'USE'
70
78
  )
@@ -16,6 +16,7 @@ WHERE TRUE
16
16
  deleted IS NULL
17
17
  OR deleted > CURRENT_TIMESTAMP - INTERVAL '1 day'
18
18
  )
19
+ AND TRIM(COALESCE(schema_name, '')) != ''
19
20
  {database_allowed}
20
21
  {database_blocked}
21
22
  AND CASE {has_fetch_transient} WHEN FALSE THEN NOT s.is_transient::BOOLEAN ELSE TRUE END
@@ -41,8 +41,8 @@ FROM snowflake.account_usage.tables AS t
41
41
  JOIN snowflake.account_usage.schemata AS s ON s.schema_id = t.table_schema_id
42
42
  JOIN tags_agg_tables ta ON t.table_id = ta.table_id
43
43
  WHERE TRUE
44
- AND t.table_name IS NOT NULL
45
- AND t.table_name != ''
44
+ AND TRIM(COALESCE(t.table_name, '')) != ''
45
+ AND TRIM(COALESCE(s.schema_name, '')) != ''
46
46
  AND UPPER(t.table_catalog) NOT IN ('SNOWFLAKE', 'UTIL_DB')
47
47
  AND (
48
48
  t.deleted IS NULL
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.2
3
+ Version: 0.24.4
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -210,6 +210,18 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
210
210
 
211
211
  # Changelog
212
212
 
213
+ ## 0.24.4 - 2025-03-19
214
+
215
+ * Snowflake:
216
+ * improve the list of ignored queries in the query history extraction
217
+ * ignore the following query types : CALL, COMMENT, EXPLAIN, REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION, REVOKE, TRUNCATE_TABLE, UNDROP
218
+ * ignore queries with empty text
219
+ * filter out schemas with empty names
220
+
221
+ ## 0.24.3 - 2025-03-18
222
+
223
+ * Replace ThoughtSpot endpoint `/api/rest/2.0/report/liveboard` with `/api/rest/2.0/metadata/liveboard/data` following the deprecation of the CSV option
224
+
213
225
  ## 0.24.2 - 2025-03-17
214
226
 
215
227
  * Rename Revamped Tableau Connector classes
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=8iEypB0lozhyFumiedys3lbpowlX3HXCPnK-3QvjueE,15884
1
+ CHANGELOG.md,sha256=1Y5FmmQDspwZaOhKjnJosP2sNd898LeTOmVIMTBt9Bw,16387
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -276,11 +276,10 @@ castor_extractor/visualization/tableau/extract.py,sha256=FnjmmUdNA9MEf3S5Tw37x6Z
276
276
  castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
277
277
  castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
278
278
  castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
279
- castor_extractor/visualization/thoughtspot/client/client.py,sha256=mtwMCPI1-1tyZb1gSYYr-O2QZMTFQwNgillU6ycsOU4,5552
279
+ castor_extractor/visualization/thoughtspot/client/client.py,sha256=lRNkigPV2MTozgBzFkij7mCXMMRqXzPtNs8EEi_f3tk,7127
280
280
  castor_extractor/visualization/thoughtspot/client/credentials.py,sha256=fp4YHiZy-dstWiLr5c4kFU9SyPK5rd2nCeh8k5sVRpM,462
281
- castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=u3FRkmG6j5OIMEeXWZcgRObP8JeC4EutIJEeitNV44c,330
282
- castor_extractor/visualization/thoughtspot/client/utils.py,sha256=3LgbIWoG1e39VW8rYaV4ot_0EFipziwf3rFAZKxrlEY,1072
283
- castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=2XysRU7a58KA2JgNwU2j4GPrN0rkN7Gvk8kQCJlYXVk,2469
281
+ castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=XLDGs7v2e2S2VdJX8cQjMh80KNCHb_H5A9I8ejP1ZPs,342
282
+ castor_extractor/visualization/thoughtspot/client/pagination.py,sha256=iosYUJ7ZMT1G_Jm6AXPwczYnXFzS6Yez-B9-tRFiV_w,619
284
283
  castor_extractor/visualization/thoughtspot/extract.py,sha256=mcXS0jGFpa50td98AVbbTqxchyI5wDCpB-v1o5iRc3g,1354
285
284
  castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
286
285
  castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
@@ -380,16 +379,16 @@ castor_extractor/warehouse/snowflake/credentials.py,sha256=u0sZ6xPtcZmmvnUsAejJk
380
379
  castor_extractor/warehouse/snowflake/credentials_test.py,sha256=Lkc-DHXOvr50KrqAW4nt_x0IA0Mu_CsBVu6ATnzQB6I,673
381
380
  castor_extractor/warehouse/snowflake/extract.py,sha256=3yc9kcVtt2c1uWJOJJgeZchV4VmRr9EeYM3W6gl8zQQ,3201
382
381
  castor_extractor/warehouse/snowflake/queries/.sqlfluff,sha256=vttrwcr64JVIuvc7WIg9C54cbOkjg_VjXNR7YnTGOPE,31
383
- castor_extractor/warehouse/snowflake/queries/column.sql,sha256=Wy-arvS_3Dh0HFrzdpRmBsI58mMlN_5U097s5kMNluQ,1781
382
+ castor_extractor/warehouse/snowflake/queries/column.sql,sha256=Ru-yC0s76I9LehOA4aCZ--xz6D9H1Hyr3OZdILOBHAw,1882
384
383
  castor_extractor/warehouse/snowflake/queries/column_lineage.sql,sha256=YKBiZ6zySSNcXLDXwm31EjGIIkkkZc0-S6hI1SRM80o,1179
385
384
  castor_extractor/warehouse/snowflake/queries/database.sql,sha256=ifZXoKUXtsrGOxml6AcNhA4yybIyatH5va7bcp-lgCU,483
386
385
  castor_extractor/warehouse/snowflake/queries/function.sql,sha256=8LRh0ybhd-RldJ8UZspWUm3yv52evq11O2uqIO4KqeQ,372
387
386
  castor_extractor/warehouse/snowflake/queries/grant_to_role.sql,sha256=O7AJ1LzoXGDFmiVvQ8EMJ5x8FSAnaxRPdmRyAlEmkUM,272
388
387
  castor_extractor/warehouse/snowflake/queries/grant_to_user.sql,sha256=7AalVajU5vRRpIiys1igSwmDXirbwpMTvJr2ihSz2NE,143
389
- castor_extractor/warehouse/snowflake/queries/query.sql,sha256=-OYcWUvdPBkpOfezkZaW7hrOdDz3JyoqjNdRm_88Rsk,1779
388
+ castor_extractor/warehouse/snowflake/queries/query.sql,sha256=w4T6-TgwUozDgaF3Fk-qex7bDdEIHLkkB5XEe2VJXZQ,1992
390
389
  castor_extractor/warehouse/snowflake/queries/role.sql,sha256=D0VvGxLZMwug2SvefhAsNR9YIun0fZvcDWkz891xSYM,96
391
- castor_extractor/warehouse/snowflake/queries/schema.sql,sha256=HCDEw0Nj_GPHBNH3Ik_5BF4rkD5yBfSyeN9UaiFGrI4,730
392
- castor_extractor/warehouse/snowflake/queries/table.sql,sha256=qTwkAJ7-kM8vX03RP16U_5_euWW5ZTQAKuiLPsbj2hs,1438
390
+ castor_extractor/warehouse/snowflake/queries/schema.sql,sha256=iLn6_y5rn63KigjE4GEAMp8ZuZZofhMXYGb8saPDGUc,776
391
+ castor_extractor/warehouse/snowflake/queries/table.sql,sha256=CbSLfJAylyyyD3mkGPSLLE7BHrGjlY499kzO9RN0e4Y,1473
393
392
  castor_extractor/warehouse/snowflake/queries/user.sql,sha256=88V8eRj1NDaD_ufclsKOHHlqCtBMQHOV54yy6RKJaXk,570
394
393
  castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3N7BKkbXVM3RoIzqSDtohqRnE5kg4,673
395
394
  castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
@@ -404,8 +403,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
404
403
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
405
404
  castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
406
405
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
407
- castor_extractor-0.24.2.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
408
- castor_extractor-0.24.2.dist-info/METADATA,sha256=FNJlgmFPbgSmHoVwHx-hXj9rvHYw2wctlcEXeGck52I,23040
409
- castor_extractor-0.24.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
410
- castor_extractor-0.24.2.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
411
- castor_extractor-0.24.2.dist-info/RECORD,,
406
+ castor_extractor-0.24.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
407
+ castor_extractor-0.24.4.dist-info/METADATA,sha256=eY2TPP3IDq9an2JJzoZcN-_rG5DJIGzbJOqEtGBhzd4,23543
408
+ castor_extractor-0.24.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
409
+ castor_extractor-0.24.4.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
410
+ castor_extractor-0.24.4.dist-info/RECORD,,
@@ -1,31 +0,0 @@
1
- import csv
2
- import re
3
- from collections.abc import Iterator
4
- from io import StringIO
5
-
6
- _END_OF_GENERATED_TEXT = r'^""$'
7
-
8
-
9
- def usage_liveboard_reader(usage_liveboard_csv: str) -> Iterator[dict]:
10
- """
11
- Converts a CSV string into an iterator of dictionaries after
12
- ignoring the generated text that preceeds the actual CSV header row.
13
- The generated block ends with a row containing only two double quotes.
14
- Here is an example:
15
-
16
- "Data extract produced by Castor on 09/19/2024 06:54"
17
- "Filters applied on data :"
18
- "User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
19
- "Pinboard NOT IN [mlm - availability pinboard,null]"
20
- "Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
21
- "Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
22
- ""
23
-
24
- """
25
- csv_file = StringIO(usage_liveboard_csv)
26
-
27
- line = next(csv_file)
28
- while not re.match(_END_OF_GENERATED_TEXT, line.strip()):
29
- line = next(csv_file)
30
-
31
- yield from csv.DictReader(csv_file)
@@ -1,75 +0,0 @@
1
- from .utils import (
2
- usage_liveboard_reader,
3
- )
4
-
5
- VALID_CSV_1 = '''"Data extract produced by Castor on 09/19/2024 06:54"
6
- "Filters applied on data :"
7
- "User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
8
- "Pinboard NOT IN [mlm - availability pinboard,null]"
9
- "Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
10
- "Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
11
- ""
12
- "Pinboard","Pinboard Views","Unique Number of User"
13
- "Market Report","559","19"
14
- "Retailer report","204","14"
15
- "Second-hand market","72","6"
16
- "September test","25","2"'''
17
-
18
-
19
- VALID_CSV_2 = '''"Data extract produced by Castor on 01/07/2025 16:07"
20
- "Filters applied on data :"
21
- "Timestamp >= 20241208 00:00:00 < 20250107 00:00:00"
22
- ""
23
- "Answer name","User name","Number of unique users","Count of object interactions"
24
- "toto","tata","1","666"'''
25
-
26
- # Invalid CSV input (missing data rows)
27
- INVALID_CSV = '''"Data extract produced by Castor on 09/19/2024 06:54"
28
- "Filters applied on data :"
29
- "User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
30
- "Pinboard NOT IN [mlm - availability pinboard,null]"
31
- "Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
32
- "Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
33
- ""'''
34
-
35
-
36
- def test_usage_liveboard_reader():
37
- expected_output_1 = [
38
- {
39
- "Pinboard": "Market Report",
40
- "Pinboard Views": "559",
41
- "Unique Number of User": "19",
42
- },
43
- {
44
- "Pinboard": "Retailer report",
45
- "Pinboard Views": "204",
46
- "Unique Number of User": "14",
47
- },
48
- {
49
- "Pinboard": "Second-hand market",
50
- "Pinboard Views": "72",
51
- "Unique Number of User": "6",
52
- },
53
- {
54
- "Pinboard": "September test",
55
- "Pinboard Views": "25",
56
- "Unique Number of User": "2",
57
- },
58
- ]
59
- expected_output_2 = [
60
- {
61
- "Answer name": "toto",
62
- "User name": "tata",
63
- "Number of unique users": "1",
64
- "Count of object interactions": "666",
65
- }
66
- ]
67
-
68
- result = list(usage_liveboard_reader(VALID_CSV_1))
69
- assert result == expected_output_1
70
-
71
- result = list(usage_liveboard_reader(VALID_CSV_2))
72
- assert result == expected_output_2
73
-
74
- result = list(usage_liveboard_reader(INVALID_CSV))
75
- assert result == [] # Expect an empty result since there is no data