gooddata-pandas 1.47.0__py3-none-any.whl → 1.55.1.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gooddata_pandas/dataframe.py +18 -0
- gooddata_pandas/result_convertor.py +316 -49
- {gooddata_pandas-1.47.0.dist-info → gooddata_pandas-1.55.1.dev2.dist-info}/METADATA +11 -25
- gooddata_pandas-1.55.1.dev2.dist-info/RECORD +13 -0
- {gooddata_pandas-1.47.0.dist-info → gooddata_pandas-1.55.1.dev2.dist-info}/WHEEL +1 -2
- gooddata_pandas-1.47.0.dist-info/RECORD +0 -14
- gooddata_pandas-1.47.0.dist-info/top_level.txt +0 -1
- {gooddata_pandas-1.47.0.dist-info → gooddata_pandas-1.55.1.dev2.dist-info}/licenses/LICENSE.txt +0 -0
gooddata_pandas/dataframe.py
CHANGED
|
@@ -238,6 +238,7 @@ class DataFrameFactory:
|
|
|
238
238
|
created_visualizations_response: dict,
|
|
239
239
|
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
|
|
240
240
|
is_cancellable: bool = False,
|
|
241
|
+
optimized: bool = False,
|
|
241
242
|
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
|
|
242
243
|
"""
|
|
243
244
|
Creates a data frame using a created visualization.
|
|
@@ -247,6 +248,10 @@ class DataFrameFactory:
|
|
|
247
248
|
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
|
|
248
249
|
submitted to the backend.
|
|
249
250
|
is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
|
|
251
|
+
optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
|
|
252
|
+
headers in memory as lists of dicts, which can consume a lot of memory for large results.
|
|
253
|
+
Optimized accumulator stores only unique values and story only reference to them in the list,
|
|
254
|
+
which can significantly reduce memory usage.
|
|
250
255
|
|
|
251
256
|
Returns:
|
|
252
257
|
pandas.DataFrame: A DataFrame instance.
|
|
@@ -257,6 +262,7 @@ class DataFrameFactory:
|
|
|
257
262
|
return self.for_exec_def(
|
|
258
263
|
exec_def=execution_definition,
|
|
259
264
|
on_execution_submitted=on_execution_submitted,
|
|
265
|
+
optimized=optimized,
|
|
260
266
|
)
|
|
261
267
|
|
|
262
268
|
def result_cache_metadata_for_exec_result_id(self, result_id: str) -> ResultCacheMetadata:
|
|
@@ -279,6 +285,7 @@ class DataFrameFactory:
|
|
|
279
285
|
result_size_bytes_limit: Optional[int] = None,
|
|
280
286
|
page_size: int = _DEFAULT_PAGE_SIZE,
|
|
281
287
|
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
|
|
288
|
+
optimized: bool = False,
|
|
282
289
|
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
|
|
283
290
|
"""
|
|
284
291
|
Creates a data frame using an execution definition.
|
|
@@ -311,6 +318,10 @@ class DataFrameFactory:
|
|
|
311
318
|
page_size (int): Number of records per page.
|
|
312
319
|
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
|
|
313
320
|
submitted to the backend.
|
|
321
|
+
optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
|
|
322
|
+
headers in memory as lists of dicts, which can consume a lot of memory for large results.
|
|
323
|
+
Optimized accumulator stores only unique values and story only reference to them in the list,
|
|
324
|
+
which can significantly reduce memory usage.
|
|
314
325
|
|
|
315
326
|
Returns:
|
|
316
327
|
Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
|
|
@@ -331,6 +342,7 @@ class DataFrameFactory:
|
|
|
331
342
|
result_size_dimensions_limits=result_size_dimensions_limits,
|
|
332
343
|
result_size_bytes_limit=result_size_bytes_limit,
|
|
333
344
|
page_size=page_size,
|
|
345
|
+
optimized=optimized,
|
|
334
346
|
)
|
|
335
347
|
|
|
336
348
|
def for_exec_result_id(
|
|
@@ -343,6 +355,7 @@ class DataFrameFactory:
|
|
|
343
355
|
use_local_ids_in_headers: bool = False,
|
|
344
356
|
use_primary_labels_in_attributes: bool = False,
|
|
345
357
|
page_size: int = _DEFAULT_PAGE_SIZE,
|
|
358
|
+
optimized: bool = False,
|
|
346
359
|
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
|
|
347
360
|
"""
|
|
348
361
|
Retrieves a DataFrame and DataFrame metadata for a given execution result identifier.
|
|
@@ -373,6 +386,10 @@ class DataFrameFactory:
|
|
|
373
386
|
use_local_ids_in_headers (bool): Use local identifier in headers.
|
|
374
387
|
use_primary_labels_in_attributes (bool): Use primary labels in attributes.
|
|
375
388
|
page_size (int): Number of records per page.
|
|
389
|
+
optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
|
|
390
|
+
headers in memory as lists of dicts, which can consume a lot of memory for large results.
|
|
391
|
+
Optimized accumulator stores only unique values and story only reference to them in the list,
|
|
392
|
+
which can significantly reduce memory usage.
|
|
376
393
|
|
|
377
394
|
Returns:
|
|
378
395
|
Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
|
|
@@ -398,4 +415,5 @@ class DataFrameFactory:
|
|
|
398
415
|
use_local_ids_in_headers=use_local_ids_in_headers,
|
|
399
416
|
use_primary_labels_in_attributes=use_primary_labels_in_attributes,
|
|
400
417
|
page_size=page_size,
|
|
418
|
+
optimized=optimized,
|
|
401
419
|
)
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
# (C) 2022 GoodData Corporation
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from collections.abc import Iterator
|
|
4
|
+
from functools import cached_property
|
|
2
5
|
from typing import Any, Callable, Optional, Union, cast
|
|
3
6
|
|
|
4
7
|
import pandas
|
|
@@ -11,6 +14,163 @@ _DataArray = list[Union[int, None]]
|
|
|
11
14
|
LabelOverrides = dict[str, dict[str, dict[str, str]]]
|
|
12
15
|
|
|
13
16
|
|
|
17
|
+
@define(frozen=True, slots=True)
|
|
18
|
+
class _Header(ABC):
|
|
19
|
+
"""
|
|
20
|
+
Abstract base class for headers. There are 4 types of headers:
|
|
21
|
+
- attribute header with attribute value and primary label value
|
|
22
|
+
- attribute header with label name and label identifier
|
|
23
|
+
- measure header
|
|
24
|
+
- total header
|
|
25
|
+
|
|
26
|
+
We convert dict representation to _Header objects with slots to improve memory usage.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@cached_property
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def _dict(self) -> dict[str, Any]:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
|
|
35
|
+
return self._dict.get(key, default)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@define(frozen=True, slots=True)
|
|
39
|
+
class _AttributeValuePrimary(_Header):
|
|
40
|
+
"""
|
|
41
|
+
Attribute header with label value and primary label value.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
label_value: str
|
|
45
|
+
primary_label_value: str
|
|
46
|
+
|
|
47
|
+
@cached_property
|
|
48
|
+
def _dict(self) -> dict[str, Any]:
|
|
49
|
+
return {"attributeHeader": {"labelValue": self.label_value, "primaryLabelValue": self.primary_label_value}}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@define(frozen=True, slots=True)
|
|
53
|
+
class _AttributeNameLocal(_Header):
|
|
54
|
+
"""
|
|
55
|
+
Attribute header with label name and label identifier.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
label_name: str
|
|
59
|
+
local_identifier: str
|
|
60
|
+
|
|
61
|
+
@cached_property
|
|
62
|
+
def _dict(self) -> dict[str, Any]:
|
|
63
|
+
return {"attributeHeader": {"labelName": self.label_name, "localIdentifier": self.local_identifier}}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@define(frozen=True, slots=True)
|
|
67
|
+
class _MeasureHeader(_Header):
|
|
68
|
+
"""
|
|
69
|
+
Measure header.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
measure_index: str
|
|
73
|
+
|
|
74
|
+
@cached_property
|
|
75
|
+
def _dict(self) -> dict[str, Any]:
|
|
76
|
+
return {"measureHeader": {"measureIndex": self.measure_index}}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@define(frozen=True, slots=True)
|
|
80
|
+
class _TotalHeader(_Header):
|
|
81
|
+
"""
|
|
82
|
+
Total header.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
function: str
|
|
86
|
+
|
|
87
|
+
@cached_property
|
|
88
|
+
def _dict(self) -> dict[str, Any]:
|
|
89
|
+
return {"totalHeader": {"function": self.function}}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _header_from_dict(d: dict[str, Any]) -> Optional[_Header]:
|
|
93
|
+
"""
|
|
94
|
+
Convert dict representation to _Header object.
|
|
95
|
+
:param d: dictionary representation of a header
|
|
96
|
+
:return: _Header object or None if the dictionary does not represent a header or if it's not supported.
|
|
97
|
+
However, we expect that all execution results contain correct data.
|
|
98
|
+
"""
|
|
99
|
+
if attribute_header := d.get("attributeHeader"):
|
|
100
|
+
if "labelValue" in attribute_header:
|
|
101
|
+
return _AttributeValuePrimary(
|
|
102
|
+
label_value=attribute_header["labelValue"], primary_label_value=attribute_header["primaryLabelValue"]
|
|
103
|
+
)
|
|
104
|
+
if "labelName" in attribute_header:
|
|
105
|
+
return _AttributeNameLocal(
|
|
106
|
+
label_name=attribute_header["labelName"], local_identifier=attribute_header["localIdentifier"]
|
|
107
|
+
)
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
if measure_header := d.get("measureHeader"):
|
|
111
|
+
return _MeasureHeader(measure_header["measureIndex"])
|
|
112
|
+
|
|
113
|
+
if total_header := d.get("totalHeader"):
|
|
114
|
+
return _TotalHeader(total_header["function"])
|
|
115
|
+
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@define
|
|
120
|
+
class _HeaderContainer:
|
|
121
|
+
"""
|
|
122
|
+
Container for headers to improve memory usage.
|
|
123
|
+
Unique headers are stored as keys in _header_cache and references to them are stored in _headers.
|
|
124
|
+
This way we avoid storing the same header multiple times, reducing memory allocations,
|
|
125
|
+
which is important for large datatables with many attributes.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
_headers: list[_Header] = field(factory=list)
|
|
129
|
+
_header_cache: dict[_Header, _Header] = field(factory=dict)
|
|
130
|
+
|
|
131
|
+
def append(self, header_dict: dict) -> None:
|
|
132
|
+
"""
|
|
133
|
+
Add header to the container.
|
|
134
|
+
|
|
135
|
+
First, try to convert header dict to _Header object, and return early if it's not possible.
|
|
136
|
+
Then, check if the header is already in the container.
|
|
137
|
+
If it is, get a pointer to the existing header and add it to the container.
|
|
138
|
+
If it is not, add it to the container.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
header = _header_from_dict(header_dict)
|
|
142
|
+
if header is None:
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
if header not in self._header_cache:
|
|
146
|
+
self._header_cache[header] = header
|
|
147
|
+
self._headers.append(self._header_cache[header])
|
|
148
|
+
|
|
149
|
+
def extend(self, header_dicts: list[dict]) -> None:
|
|
150
|
+
"""
|
|
151
|
+
Add multiple headers to the container.
|
|
152
|
+
"""
|
|
153
|
+
for header_dict in header_dicts:
|
|
154
|
+
self.append(header_dict)
|
|
155
|
+
|
|
156
|
+
def __iter__(self) -> Iterator[_Header]:
|
|
157
|
+
yield from self._headers
|
|
158
|
+
|
|
159
|
+
def __len__(self) -> int:
|
|
160
|
+
return len(self._headers)
|
|
161
|
+
|
|
162
|
+
def __getitem__(self, index: int) -> _Header:
|
|
163
|
+
return self._headers[index]
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
_DataHeaderContainers = list[_HeaderContainer]
|
|
167
|
+
|
|
168
|
+
# Optimized version of _DataWithHeaders uses _HeaderContainer instead of list of headers
|
|
169
|
+
_HeadersByAxis = tuple[
|
|
170
|
+
Union[_DataHeaders, _DataHeaderContainers], Union[Optional[_DataHeaders], Optional[_DataHeaderContainers]]
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
|
|
14
174
|
@frozen
|
|
15
175
|
class _DataWithHeaders:
|
|
16
176
|
"""Extracted data; either array of values for one-dimensional result or array of arrays of values.
|
|
@@ -18,7 +178,7 @@ class _DataWithHeaders:
|
|
|
18
178
|
Attributes:
|
|
19
179
|
data (List[_DataArray]):
|
|
20
180
|
Extracted data; either array of values for one-dimensional result or array of arrays of values.
|
|
21
|
-
data_headers (
|
|
181
|
+
data_headers (_HeadersByAxis):
|
|
22
182
|
Per-dimension headers for the data.
|
|
23
183
|
grand_totals (Tuple[Optional[List[_DataArray]], Optional[List[_DataArray]]]):
|
|
24
184
|
Per-dimension grand total data.
|
|
@@ -27,32 +187,34 @@ class _DataWithHeaders:
|
|
|
27
187
|
"""
|
|
28
188
|
|
|
29
189
|
data: list[_DataArray]
|
|
30
|
-
data_headers:
|
|
190
|
+
data_headers: _HeadersByAxis
|
|
31
191
|
grand_totals: tuple[Optional[list[_DataArray]], Optional[list[_DataArray]]]
|
|
32
192
|
grand_total_headers: tuple[Optional[list[dict[str, _DataHeaders]]], Optional[list[dict[str, _DataHeaders]]]]
|
|
33
193
|
|
|
34
194
|
|
|
35
195
|
@define
|
|
36
|
-
class
|
|
196
|
+
class _AbstractAccumulatedData(ABC):
|
|
37
197
|
"""
|
|
38
198
|
Utility class to offload code from the function that extracts all data and headers for a
|
|
39
199
|
particular paged result. The method drives the paging and calls out to this class to accumulate
|
|
40
200
|
the essential data and headers from the page.
|
|
201
|
+
Note that if optimized is enabled, the data_headers are stored in _HeaderContainer instead of list of headers.
|
|
202
|
+
We do not store grand_totals_headers in _HeaderContainer, as we do not except
|
|
41
203
|
|
|
42
204
|
Attributes:
|
|
43
205
|
data (List[_DataArray]): Holds the accumulated data arrays from the pages.
|
|
44
|
-
data_headers (List[Optional[
|
|
206
|
+
data_headers (List[Optional[Any]]): Holds the headers for data arrays.
|
|
45
207
|
grand_totals (List[Optional[List[_DataArray]]]): Holds the grand total data arrays.
|
|
46
208
|
grand_totals_headers (List[Optional[_DataHeaders]]): Holds the headers for grand total data arrays.
|
|
47
209
|
"""
|
|
48
210
|
|
|
49
211
|
data: list[_DataArray] = field(init=False, factory=list)
|
|
50
|
-
data_headers: list[Optional[
|
|
212
|
+
data_headers: list[Optional[Any]] = field(init=False, factory=lambda: [None, None])
|
|
51
213
|
grand_totals: list[Optional[list[_DataArray]]] = field(init=False, factory=lambda: [None, None])
|
|
214
|
+
total_of_grant_totals_processed: bool = field(init=False, default=False)
|
|
52
215
|
grand_totals_headers: list[Optional[list[dict[str, _DataHeaders]]]] = field(
|
|
53
216
|
init=False, factory=lambda: [None, None]
|
|
54
217
|
)
|
|
55
|
-
total_of_grant_totals_processed: bool = field(init=False, default=False)
|
|
56
218
|
|
|
57
219
|
def accumulate_data(self, from_result: ExecutionResult) -> None:
|
|
58
220
|
"""
|
|
@@ -79,24 +241,6 @@ class _AccumulatedData:
|
|
|
79
241
|
for i in range(len(from_result.data)):
|
|
80
242
|
self.data[offset + i].extend(from_result.data[i])
|
|
81
243
|
|
|
82
|
-
def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
|
|
83
|
-
"""
|
|
84
|
-
Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
|
|
85
|
-
matching the dimension index.
|
|
86
|
-
|
|
87
|
-
This will mutate the `data_headers`.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
from_result (ExecutionResult): The result whose headers will be accumulated.
|
|
91
|
-
from_dim (int): The dimension index.
|
|
92
|
-
"""
|
|
93
|
-
|
|
94
|
-
if self.data_headers[from_dim] is None:
|
|
95
|
-
self.data_headers[from_dim] = from_result.get_all_headers(dim=from_dim)
|
|
96
|
-
else:
|
|
97
|
-
for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
|
|
98
|
-
cast(_DataHeaders, self.data_headers[from_dim])[idx].extend(headers)
|
|
99
|
-
|
|
100
244
|
def accumulate_grand_totals(
|
|
101
245
|
self, from_result: ExecutionResult, paging_dim: int, response: BareExecutionResponse
|
|
102
246
|
) -> None:
|
|
@@ -161,6 +305,56 @@ class _AccumulatedData:
|
|
|
161
305
|
# have row totals and paging down, keep adding extra rows
|
|
162
306
|
grand_totals_item.extend(grand_total["data"])
|
|
163
307
|
|
|
308
|
+
@abstractmethod
|
|
309
|
+
def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
|
|
310
|
+
"""
|
|
311
|
+
Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
|
|
312
|
+
matching the dimension index.
|
|
313
|
+
|
|
314
|
+
This will mutate the `data_headers`.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
from_result (ExecutionResult): The result whose headers will be accumulated.
|
|
318
|
+
from_dim (int): The dimension index.
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
@abstractmethod
|
|
322
|
+
def result(self) -> _DataWithHeaders:
|
|
323
|
+
"""
|
|
324
|
+
Returns the data with headers.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
_DataWithHeaders: The data, data headers, grand totals and grand total headers.
|
|
328
|
+
"""
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
@define
|
|
332
|
+
class _AccumulatedData(_AbstractAccumulatedData):
|
|
333
|
+
"""
|
|
334
|
+
Implementation of _AbstractAccumulatedData that uses list of dicts as storage,
|
|
335
|
+
which is used when non-optimized data extraction is used.
|
|
336
|
+
|
|
337
|
+
This implementation may lead to uncontrolled memory usage for large results.
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
|
|
341
|
+
"""
|
|
342
|
+
Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
|
|
343
|
+
matching the dimension index.
|
|
344
|
+
|
|
345
|
+
This will mutate the `data_headers`.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
from_result (ExecutionResult): The result whose headers will be accumulated.
|
|
349
|
+
from_dim (int): The dimension index.
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
if self.data_headers[from_dim] is None:
|
|
353
|
+
self.data_headers[from_dim] = from_result.get_all_headers(dim=from_dim)
|
|
354
|
+
else:
|
|
355
|
+
for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
|
|
356
|
+
cast(_DataHeaders, self.data_headers[from_dim])[idx].extend(headers)
|
|
357
|
+
|
|
164
358
|
def result(self) -> _DataWithHeaders:
|
|
165
359
|
"""
|
|
166
360
|
Returns the data with headers.
|
|
@@ -176,6 +370,55 @@ class _AccumulatedData:
|
|
|
176
370
|
)
|
|
177
371
|
|
|
178
372
|
|
|
373
|
+
@define
|
|
374
|
+
class _OptimizedAccumulatedData(_AbstractAccumulatedData):
|
|
375
|
+
"""
|
|
376
|
+
Implementation of _AbstractAccumulatedData that stores headers in _HeaderContainer objects,
|
|
377
|
+
which is used when optimized data extraction is used.
|
|
378
|
+
|
|
379
|
+
This implementation is more memory efficient than _AccumulatedData.
|
|
380
|
+
"""
|
|
381
|
+
|
|
382
|
+
def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
|
|
383
|
+
"""
|
|
384
|
+
Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
|
|
385
|
+
matching the dimension index.
|
|
386
|
+
|
|
387
|
+
This will mutate the `data_headers`.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
from_result (ExecutionResult): The result whose headers will be accumulated.
|
|
391
|
+
from_dim (int): The dimension index.
|
|
392
|
+
"""
|
|
393
|
+
|
|
394
|
+
if containers := self.data_headers[from_dim]:
|
|
395
|
+
for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
|
|
396
|
+
containers[idx].extend(headers)
|
|
397
|
+
else:
|
|
398
|
+
self.data_headers[from_dim] = []
|
|
399
|
+
containers = []
|
|
400
|
+
for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
|
|
401
|
+
hc = _HeaderContainer()
|
|
402
|
+
hc.extend(headers)
|
|
403
|
+
containers.append(hc)
|
|
404
|
+
self.data_headers[from_dim] = containers
|
|
405
|
+
|
|
406
|
+
def result(self) -> _DataWithHeaders:
|
|
407
|
+
"""
|
|
408
|
+
Returns the data with headers.
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
_DataWithHeaders: The data, data headers, grand totals and grand total headers.
|
|
412
|
+
"""
|
|
413
|
+
|
|
414
|
+
return _DataWithHeaders(
|
|
415
|
+
data=self.data,
|
|
416
|
+
data_headers=(cast(_DataHeaderContainers, self.data_headers[0]), self.data_headers[1]),
|
|
417
|
+
grand_totals=(self.grand_totals[0], self.grand_totals[1]),
|
|
418
|
+
grand_total_headers=(self.grand_totals_headers[0], self.grand_totals_headers[1]),
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
|
|
179
422
|
@define
|
|
180
423
|
class DataFrameMetadata:
|
|
181
424
|
"""
|
|
@@ -194,11 +437,13 @@ class DataFrameMetadata:
|
|
|
194
437
|
| AVG | 150
|
|
195
438
|
SUM | | 450
|
|
196
439
|
|
|
440
|
+
column_totals_indexes: Similar to row_totals_indexes but for column headers.
|
|
197
441
|
execution_response: An instance of BareExecutionResponse representing the
|
|
198
442
|
execution response.
|
|
199
443
|
"""
|
|
200
444
|
|
|
201
445
|
row_totals_indexes: list[list[int]]
|
|
446
|
+
column_totals_indexes: list[list[int]]
|
|
202
447
|
execution_response: BareExecutionResponse
|
|
203
448
|
primary_labels_from_index: dict[int, dict[str, str]]
|
|
204
449
|
primary_labels_from_columns: dict[int, dict[str, str]]
|
|
@@ -206,27 +451,36 @@ class DataFrameMetadata:
|
|
|
206
451
|
@classmethod
|
|
207
452
|
def from_data(
|
|
208
453
|
cls,
|
|
209
|
-
headers:
|
|
454
|
+
headers: _HeadersByAxis,
|
|
210
455
|
execution_response: BareExecutionResponse,
|
|
211
456
|
primary_labels_from_index: dict[int, dict[str, str]],
|
|
212
457
|
primary_labels_from_columns: dict[int, dict[str, str]],
|
|
213
458
|
) -> "DataFrameMetadata":
|
|
214
459
|
"""This method constructs a DataFrameMetadata object from data headers and an execution response.
|
|
215
460
|
|
|
216
|
-
Args: headers (
|
|
461
|
+
Args: headers (_HeadersByAxis):
|
|
217
462
|
A tuple containing data headers. execution_response (BareExecutionResponse): An ExecutionResponse object.
|
|
218
463
|
|
|
219
464
|
Returns: DataFrameMetadata: An initialized DataFrameMetadata object."""
|
|
220
|
-
row_totals_indexes = [
|
|
221
|
-
|
|
222
|
-
]
|
|
465
|
+
row_totals_indexes = cls._get_totals_indexes(headers[0])
|
|
466
|
+
column_totals_indexes = cls._get_totals_indexes(headers[1])
|
|
223
467
|
return cls(
|
|
224
468
|
row_totals_indexes=row_totals_indexes,
|
|
469
|
+
column_totals_indexes=column_totals_indexes,
|
|
225
470
|
execution_response=execution_response,
|
|
226
471
|
primary_labels_from_index=primary_labels_from_index,
|
|
227
472
|
primary_labels_from_columns=primary_labels_from_columns,
|
|
228
473
|
)
|
|
229
474
|
|
|
475
|
+
@staticmethod
|
|
476
|
+
def _get_totals_indexes(headers: Optional[Any]) -> list[list[int]]:
|
|
477
|
+
if headers is None:
|
|
478
|
+
return []
|
|
479
|
+
return [
|
|
480
|
+
[idx for idx, hdr in enumerate(dim) if hdr is not None and hdr.get("totalHeader") is not None]
|
|
481
|
+
for dim in headers
|
|
482
|
+
]
|
|
483
|
+
|
|
230
484
|
|
|
231
485
|
def _read_complete_execution_result(
|
|
232
486
|
execution_response: BareExecutionResponse,
|
|
@@ -234,6 +488,7 @@ def _read_complete_execution_result(
|
|
|
234
488
|
result_size_dimensions_limits: ResultSizeDimensions,
|
|
235
489
|
result_size_bytes_limit: Optional[int] = None,
|
|
236
490
|
page_size: int = _DEFAULT_PAGE_SIZE,
|
|
491
|
+
optimized: bool = False,
|
|
237
492
|
) -> _DataWithHeaders:
|
|
238
493
|
"""
|
|
239
494
|
Extracts all data and headers for an execution result. This does page around the execution result to extract
|
|
@@ -245,6 +500,10 @@ def _read_complete_execution_result(
|
|
|
245
500
|
result_size_dimensions_limits (ResultSizeDimensions): Limits for result size dimensions.
|
|
246
501
|
result_size_bytes_limit (Optional[int], optional): Limit for result size in bytes. Defaults to None.
|
|
247
502
|
page_size (int, optional): Page size to use when reading data. Defaults to _DEFAULT_PAGE_SIZE.
|
|
503
|
+
optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
|
|
504
|
+
headers in memory as lists of dicts, which can consume a lot of memory for large results.
|
|
505
|
+
Optimized accumulator stores only unique values and story only reference to them in the list,
|
|
506
|
+
which can significantly reduce memory usage.
|
|
248
507
|
|
|
249
508
|
Returns:
|
|
250
509
|
_DataWithHeaders: All the data and headers from the execution result.
|
|
@@ -252,10 +511,10 @@ def _read_complete_execution_result(
|
|
|
252
511
|
num_dims = len(execution_response.dimensions)
|
|
253
512
|
offset = [0] * num_dims
|
|
254
513
|
limit = [page_size] * num_dims
|
|
255
|
-
acc = _AccumulatedData()
|
|
256
514
|
|
|
257
|
-
|
|
515
|
+
acc = _OptimizedAccumulatedData() if optimized else _AccumulatedData()
|
|
258
516
|
|
|
517
|
+
result_size_limits_checked = False
|
|
259
518
|
while True:
|
|
260
519
|
# top-level loop pages through the first dimension;
|
|
261
520
|
#
|
|
@@ -303,7 +562,6 @@ def _read_complete_execution_result(
|
|
|
303
562
|
break
|
|
304
563
|
|
|
305
564
|
offset = [result.next_page_start(dim=0), 0] if num_dims > 1 else [result.next_page_start(dim=0)]
|
|
306
|
-
|
|
307
565
|
return acc.result()
|
|
308
566
|
|
|
309
567
|
|
|
@@ -339,14 +597,14 @@ def _create_header_mapper(
|
|
|
339
597
|
attribute_labels = label_overrides.get("labels", {})
|
|
340
598
|
measure_labels = label_overrides.get("metrics", {})
|
|
341
599
|
|
|
342
|
-
def _mapper(header:
|
|
600
|
+
def _mapper(header: Union[dict, _Header, None], header_idx: Optional[int]) -> Optional[str]:
|
|
343
601
|
label = None
|
|
344
602
|
if header is None:
|
|
345
603
|
pass
|
|
346
|
-
elif "attributeHeader"
|
|
347
|
-
if "labelValue" in
|
|
348
|
-
label_value =
|
|
349
|
-
primary_label_value =
|
|
604
|
+
elif attribute_header := header.get("attributeHeader"):
|
|
605
|
+
if "labelValue" in attribute_header:
|
|
606
|
+
label_value = attribute_header["labelValue"]
|
|
607
|
+
primary_label_value = attribute_header["primaryLabelValue"]
|
|
350
608
|
label = primary_label_value if use_primary_labels_in_attributes else label_value
|
|
351
609
|
if header_idx is not None:
|
|
352
610
|
if header_idx in primary_attribute_labels_mapping:
|
|
@@ -359,17 +617,18 @@ def _create_header_mapper(
|
|
|
359
617
|
# Excel formatter apply call failure
|
|
360
618
|
if label is None:
|
|
361
619
|
label = " "
|
|
362
|
-
elif "labelName" in
|
|
363
|
-
attr_local_id =
|
|
620
|
+
elif "labelName" in attribute_header:
|
|
621
|
+
attr_local_id = attribute_header["localIdentifier"]
|
|
364
622
|
if use_local_ids_in_headers:
|
|
365
623
|
label = attr_local_id
|
|
366
624
|
else:
|
|
367
625
|
if attr_local_id in attribute_labels:
|
|
368
626
|
label = attribute_labels[attr_local_id]["title"]
|
|
369
627
|
else:
|
|
370
|
-
label =
|
|
371
|
-
|
|
372
|
-
|
|
628
|
+
label = attribute_header["labelName"]
|
|
629
|
+
|
|
630
|
+
elif (measure_header := header.get("measureHeader")) and header_idx is not None:
|
|
631
|
+
measure_idx = measure_header["measureIndex"]
|
|
373
632
|
measure_descriptor = dim_descriptor["headers"][header_idx]["measureGroupHeaders"][measure_idx]
|
|
374
633
|
|
|
375
634
|
if use_local_ids_in_headers:
|
|
@@ -381,8 +640,9 @@ def _create_header_mapper(
|
|
|
381
640
|
label = measure_descriptor["name"]
|
|
382
641
|
else:
|
|
383
642
|
label = measure_descriptor["localIdentifier"]
|
|
384
|
-
|
|
385
|
-
|
|
643
|
+
|
|
644
|
+
elif total_header := header.get("totalHeader"):
|
|
645
|
+
label = total_header["function"]
|
|
386
646
|
return label
|
|
387
647
|
|
|
388
648
|
return _mapper
|
|
@@ -390,7 +650,7 @@ def _create_header_mapper(
|
|
|
390
650
|
|
|
391
651
|
def _headers_to_index(
|
|
392
652
|
dim_idx: int,
|
|
393
|
-
headers:
|
|
653
|
+
headers: _HeadersByAxis,
|
|
394
654
|
response: BareExecutionResponse,
|
|
395
655
|
label_overrides: LabelOverrides,
|
|
396
656
|
use_local_ids_in_headers: bool = False,
|
|
@@ -432,7 +692,7 @@ def _headers_to_index(
|
|
|
432
692
|
return pandas.MultiIndex.from_arrays(
|
|
433
693
|
[
|
|
434
694
|
tuple(mapper(header, header_idx) for header in header_group)
|
|
435
|
-
for header_idx, header_group in enumerate(cast(
|
|
695
|
+
for header_idx, header_group in enumerate(cast(list, headers[dim_idx]))
|
|
436
696
|
],
|
|
437
697
|
names=[mapper(dim_header, None) for dim_header in (response.dimensions[dim_idx]["headers"])],
|
|
438
698
|
), primary_attribute_labels_mapping
|
|
@@ -465,17 +725,17 @@ def _merge_grand_totals_into_data(extract: _DataWithHeaders) -> Union[_DataArray
|
|
|
465
725
|
return data
|
|
466
726
|
|
|
467
727
|
|
|
468
|
-
def _merge_grand_total_headers_into_headers(extract: _DataWithHeaders) ->
|
|
728
|
+
def _merge_grand_total_headers_into_headers(extract: _DataWithHeaders) -> _HeadersByAxis:
|
|
469
729
|
"""Merges grand total headers into data headers. This function will mutate the extracted data.
|
|
470
730
|
|
|
471
731
|
Args:
|
|
472
732
|
extract (_DataWithHeaders): The data along with its headers that need to be merged.
|
|
473
733
|
|
|
474
734
|
Returns:
|
|
475
|
-
|
|
735
|
+
_HeadersByAxis:
|
|
476
736
|
A tuple containing the modified data headers and the grand total headers if present.
|
|
477
737
|
"""
|
|
478
|
-
headers:
|
|
738
|
+
headers: _HeadersByAxis = extract.data_headers
|
|
479
739
|
|
|
480
740
|
for dim_idx, grand_total_headers in enumerate(extract.grand_total_headers):
|
|
481
741
|
if grand_total_headers is None:
|
|
@@ -496,6 +756,7 @@ def convert_execution_response_to_dataframe(
|
|
|
496
756
|
use_local_ids_in_headers: bool = False,
|
|
497
757
|
use_primary_labels_in_attributes: bool = False,
|
|
498
758
|
page_size: int = _DEFAULT_PAGE_SIZE,
|
|
759
|
+
optimized: bool = False,
|
|
499
760
|
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
|
|
500
761
|
"""
|
|
501
762
|
Converts execution result to a pandas dataframe, maintaining the dimensionality of the result.
|
|
@@ -511,6 +772,10 @@ def convert_execution_response_to_dataframe(
|
|
|
511
772
|
use_primary_labels_in_attributes (bool, default=False): Use primary labels in attributes if True, else use
|
|
512
773
|
default settings.
|
|
513
774
|
page_size (int, default=_DEFAULT_PAGE_SIZE): Size of the page.
|
|
775
|
+
optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
|
|
776
|
+
headers in memory as lists of dicts, which can consume a lot of memory for large results.
|
|
777
|
+
Optimized accumulator stores only unique values and story only reference to them in the list,
|
|
778
|
+
which can significantly reduce memory usage.
|
|
514
779
|
|
|
515
780
|
Returns:
|
|
516
781
|
Tuple[pandas.DataFrame, DataFrameMetadata]: A tuple containing the created dataframe and its metadata.
|
|
@@ -521,7 +786,9 @@ def convert_execution_response_to_dataframe(
|
|
|
521
786
|
result_size_dimensions_limits=result_size_dimensions_limits,
|
|
522
787
|
result_size_bytes_limit=result_size_bytes_limit,
|
|
523
788
|
page_size=page_size,
|
|
789
|
+
optimized=optimized,
|
|
524
790
|
)
|
|
791
|
+
|
|
525
792
|
full_data = _merge_grand_totals_into_data(extract)
|
|
526
793
|
full_headers = _merge_grand_total_headers_into_headers(extract)
|
|
527
794
|
|
|
@@ -1,42 +1,28 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gooddata-pandas
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.55.1.dev2
|
|
4
4
|
Summary: GoodData Cloud to pandas
|
|
5
|
-
|
|
6
|
-
Author-email: support@gooddata.com
|
|
7
|
-
License: MIT
|
|
8
|
-
Project-URL: Documentation, https://gooddata-pandas.readthedocs.io/en/v1.47.0
|
|
5
|
+
Project-URL: Documentation, https://gooddata-pandas.readthedocs.io/en/v1.55.1.dev2
|
|
9
6
|
Project-URL: Source, https://github.com/gooddata/gooddata-python-sdk
|
|
10
|
-
|
|
7
|
+
Author-email: GoodData <support@gooddata.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE.txt
|
|
10
|
+
Keywords: analytics,business,cloud,data,data_frame,frame,gooddata,headless,headless-bi,intelligence,layer,metrics,native,pandas,semantic,series,sql
|
|
11
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
12
|
Classifier: Environment :: Console
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
16
|
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
18
|
Classifier: Topic :: Database
|
|
20
19
|
Classifier: Topic :: Scientific/Engineering
|
|
21
20
|
Classifier: Topic :: Software Development
|
|
22
21
|
Classifier: Typing :: Typed
|
|
23
|
-
Requires-Python: >=3.
|
|
24
|
-
|
|
25
|
-
License-File: LICENSE.txt
|
|
26
|
-
Requires-Dist: gooddata-sdk~=1.47.0
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: gooddata-sdk~=1.55.1.dev2
|
|
27
24
|
Requires-Dist: pandas<3.0.0,>=2.0.0
|
|
28
|
-
|
|
29
|
-
Dynamic: author-email
|
|
30
|
-
Dynamic: classifier
|
|
31
|
-
Dynamic: description
|
|
32
|
-
Dynamic: description-content-type
|
|
33
|
-
Dynamic: keywords
|
|
34
|
-
Dynamic: license
|
|
35
|
-
Dynamic: license-file
|
|
36
|
-
Dynamic: project-url
|
|
37
|
-
Dynamic: requires-dist
|
|
38
|
-
Dynamic: requires-python
|
|
39
|
-
Dynamic: summary
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
40
26
|
|
|
41
27
|
# GoodData Pandas
|
|
42
28
|
|
|
@@ -50,7 +36,7 @@ See [DOCUMENTATION](https://gooddata-pandas.readthedocs.io/en/latest/) for more
|
|
|
50
36
|
- GoodData.CN installation; either running on your cloud
|
|
51
37
|
infrastructure or the free Community Edition running on your workstation
|
|
52
38
|
|
|
53
|
-
- Python 3.
|
|
39
|
+
- Python 3.10 or newer
|
|
54
40
|
|
|
55
41
|
## Installation
|
|
56
42
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
gooddata_pandas/__init__.py,sha256=Ta3qIIDq7kBRUsYSV3aC69AQBFvFvhtWDQucgP-l88w,297
|
|
2
|
+
gooddata_pandas/_version.py,sha256=960vTs6l7xsN2BOXWCxOc4PSKdzzKhnNEPTMnmMTCQs,119
|
|
3
|
+
gooddata_pandas/data_access.py,sha256=VPFjlOVH4dsQvbspEkT6UG_g3yA9sE5g8OLSrqKaeH4,20129
|
|
4
|
+
gooddata_pandas/dataframe.py,sha256=_riBCtkV7zJZ8YlvacPfpxs0gENMgV0W4nNii6Ei-2A,18074
|
|
5
|
+
gooddata_pandas/good_pandas.py,sha256=2GzISAD9J2CQy3KM8kuelPazOFfjA5g4v_p3TyINBW8,3474
|
|
6
|
+
gooddata_pandas/py.typed,sha256=u_MS29sadlaIqGRPYFjWml5u0gQnoQfvbsf9pu3TZJU,94
|
|
7
|
+
gooddata_pandas/result_convertor.py,sha256=Tv6Ee3JxxFbKoPmXz0R0fl7x7HnI0-5pHycFu-QsFus,34928
|
|
8
|
+
gooddata_pandas/series.py,sha256=ELBSg1jKy-AYrtXErpNhsmQ0Zd6mP1M6FNS6bGgNPyI,6780
|
|
9
|
+
gooddata_pandas/utils.py,sha256=JhWs0WYqg-9o3aWRP21ERFAxCKoT5oNKQ5mKlJh4uT4,8091
|
|
10
|
+
gooddata_pandas-1.55.1.dev2.dist-info/METADATA,sha256=fnYJMMShmWqrTrJ9zKsVctAz2rRNT4ayXvGtW0DLvXQ,2847
|
|
11
|
+
gooddata_pandas-1.55.1.dev2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
12
|
+
gooddata_pandas-1.55.1.dev2.dist-info/licenses/LICENSE.txt,sha256=3RjzQk8y9HG1_LgqvbEqWZKJnTQGOO1cpzYzBc13Myk,149825
|
|
13
|
+
gooddata_pandas-1.55.1.dev2.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
gooddata_pandas/__init__.py,sha256=Ta3qIIDq7kBRUsYSV3aC69AQBFvFvhtWDQucgP-l88w,297
|
|
2
|
-
gooddata_pandas/_version.py,sha256=960vTs6l7xsN2BOXWCxOc4PSKdzzKhnNEPTMnmMTCQs,119
|
|
3
|
-
gooddata_pandas/data_access.py,sha256=VPFjlOVH4dsQvbspEkT6UG_g3yA9sE5g8OLSrqKaeH4,20129
|
|
4
|
-
gooddata_pandas/dataframe.py,sha256=EsOgO8O42JBg1as0RZVwbeVOlGlENpkEsvlL-Xi5Jsg,16679
|
|
5
|
-
gooddata_pandas/good_pandas.py,sha256=2GzISAD9J2CQy3KM8kuelPazOFfjA5g4v_p3TyINBW8,3474
|
|
6
|
-
gooddata_pandas/py.typed,sha256=u_MS29sadlaIqGRPYFjWml5u0gQnoQfvbsf9pu3TZJU,94
|
|
7
|
-
gooddata_pandas/result_convertor.py,sha256=r7uFrjeM6cxMy08YcS3LywF1iUPSyEyG3BAddh0DkIQ,25807
|
|
8
|
-
gooddata_pandas/series.py,sha256=ELBSg1jKy-AYrtXErpNhsmQ0Zd6mP1M6FNS6bGgNPyI,6780
|
|
9
|
-
gooddata_pandas/utils.py,sha256=JhWs0WYqg-9o3aWRP21ERFAxCKoT5oNKQ5mKlJh4uT4,8091
|
|
10
|
-
gooddata_pandas-1.47.0.dist-info/licenses/LICENSE.txt,sha256=3RjzQk8y9HG1_LgqvbEqWZKJnTQGOO1cpzYzBc13Myk,149825
|
|
11
|
-
gooddata_pandas-1.47.0.dist-info/METADATA,sha256=TW89fiNcfnRNuTio6dwsR7JnZGFnpXpiCO-m8bVz5iI,3133
|
|
12
|
-
gooddata_pandas-1.47.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
gooddata_pandas-1.47.0.dist-info/top_level.txt,sha256=B7K_WFxlxplJbEbv5Mf0YhX74dbOpTPgDX-W6I7CssI,16
|
|
14
|
-
gooddata_pandas-1.47.0.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
gooddata_pandas
|
{gooddata_pandas-1.47.0.dist-info → gooddata_pandas-1.55.1.dev2.dist-info}/licenses/LICENSE.txt
RENAMED
|
File without changes
|