gooddata-pandas 1.47.0__py3-none-any.whl → 1.55.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -238,6 +238,7 @@ class DataFrameFactory:
238
238
  created_visualizations_response: dict,
239
239
  on_execution_submitted: Optional[Callable[[Execution], None]] = None,
240
240
  is_cancellable: bool = False,
241
+ optimized: bool = False,
241
242
  ) -> tuple[pandas.DataFrame, DataFrameMetadata]:
242
243
  """
243
244
  Creates a data frame using a created visualization.
@@ -247,6 +248,10 @@ class DataFrameFactory:
247
248
  on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
248
249
  submitted to the backend.
249
250
  is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
251
+ optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
252
+ headers in memory as lists of dicts, which can consume a lot of memory for large results.
253
+ Optimized accumulator stores only unique values and story only reference to them in the list,
254
+ which can significantly reduce memory usage.
250
255
 
251
256
  Returns:
252
257
  pandas.DataFrame: A DataFrame instance.
@@ -257,6 +262,7 @@ class DataFrameFactory:
257
262
  return self.for_exec_def(
258
263
  exec_def=execution_definition,
259
264
  on_execution_submitted=on_execution_submitted,
265
+ optimized=optimized,
260
266
  )
261
267
 
262
268
  def result_cache_metadata_for_exec_result_id(self, result_id: str) -> ResultCacheMetadata:
@@ -279,6 +285,7 @@ class DataFrameFactory:
279
285
  result_size_bytes_limit: Optional[int] = None,
280
286
  page_size: int = _DEFAULT_PAGE_SIZE,
281
287
  on_execution_submitted: Optional[Callable[[Execution], None]] = None,
288
+ optimized: bool = False,
282
289
  ) -> tuple[pandas.DataFrame, DataFrameMetadata]:
283
290
  """
284
291
  Creates a data frame using an execution definition.
@@ -311,6 +318,10 @@ class DataFrameFactory:
311
318
  page_size (int): Number of records per page.
312
319
  on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
313
320
  submitted to the backend.
321
+ optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
322
+ headers in memory as lists of dicts, which can consume a lot of memory for large results.
323
+ Optimized accumulator stores only unique values and story only reference to them in the list,
324
+ which can significantly reduce memory usage.
314
325
 
315
326
  Returns:
316
327
  Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
@@ -331,6 +342,7 @@ class DataFrameFactory:
331
342
  result_size_dimensions_limits=result_size_dimensions_limits,
332
343
  result_size_bytes_limit=result_size_bytes_limit,
333
344
  page_size=page_size,
345
+ optimized=optimized,
334
346
  )
335
347
 
336
348
  def for_exec_result_id(
@@ -343,6 +355,7 @@ class DataFrameFactory:
343
355
  use_local_ids_in_headers: bool = False,
344
356
  use_primary_labels_in_attributes: bool = False,
345
357
  page_size: int = _DEFAULT_PAGE_SIZE,
358
+ optimized: bool = False,
346
359
  ) -> tuple[pandas.DataFrame, DataFrameMetadata]:
347
360
  """
348
361
  Retrieves a DataFrame and DataFrame metadata for a given execution result identifier.
@@ -373,6 +386,10 @@ class DataFrameFactory:
373
386
  use_local_ids_in_headers (bool): Use local identifier in headers.
374
387
  use_primary_labels_in_attributes (bool): Use primary labels in attributes.
375
388
  page_size (int): Number of records per page.
389
+ optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
390
+ headers in memory as lists of dicts, which can consume a lot of memory for large results.
391
+ Optimized accumulator stores only unique values and story only reference to them in the list,
392
+ which can significantly reduce memory usage.
376
393
 
377
394
  Returns:
378
395
  Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
@@ -398,4 +415,5 @@ class DataFrameFactory:
398
415
  use_local_ids_in_headers=use_local_ids_in_headers,
399
416
  use_primary_labels_in_attributes=use_primary_labels_in_attributes,
400
417
  page_size=page_size,
418
+ optimized=optimized,
401
419
  )
@@ -1,4 +1,7 @@
1
1
  # (C) 2022 GoodData Corporation
2
+ from abc import ABC, abstractmethod
3
+ from collections.abc import Iterator
4
+ from functools import cached_property
2
5
  from typing import Any, Callable, Optional, Union, cast
3
6
 
4
7
  import pandas
@@ -11,6 +14,163 @@ _DataArray = list[Union[int, None]]
11
14
  LabelOverrides = dict[str, dict[str, dict[str, str]]]
12
15
 
13
16
 
17
+ @define(frozen=True, slots=True)
18
+ class _Header(ABC):
19
+ """
20
+ Abstract base class for headers. There are 4 types of headers:
21
+ - attribute header with attribute value and primary label value
22
+ - attribute header with label name and label identifier
23
+ - measure header
24
+ - total header
25
+
26
+ We convert dict representation to _Header objects with slots to improve memory usage.
27
+ """
28
+
29
+ @cached_property
30
+ @abstractmethod
31
+ def _dict(self) -> dict[str, Any]:
32
+ pass
33
+
34
+ def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
35
+ return self._dict.get(key, default)
36
+
37
+
38
+ @define(frozen=True, slots=True)
39
+ class _AttributeValuePrimary(_Header):
40
+ """
41
+ Attribute header with label value and primary label value.
42
+ """
43
+
44
+ label_value: str
45
+ primary_label_value: str
46
+
47
+ @cached_property
48
+ def _dict(self) -> dict[str, Any]:
49
+ return {"attributeHeader": {"labelValue": self.label_value, "primaryLabelValue": self.primary_label_value}}
50
+
51
+
52
+ @define(frozen=True, slots=True)
53
+ class _AttributeNameLocal(_Header):
54
+ """
55
+ Attribute header with label name and label identifier.
56
+ """
57
+
58
+ label_name: str
59
+ local_identifier: str
60
+
61
+ @cached_property
62
+ def _dict(self) -> dict[str, Any]:
63
+ return {"attributeHeader": {"labelName": self.label_name, "localIdentifier": self.local_identifier}}
64
+
65
+
66
+ @define(frozen=True, slots=True)
67
+ class _MeasureHeader(_Header):
68
+ """
69
+ Measure header.
70
+ """
71
+
72
+ measure_index: str
73
+
74
+ @cached_property
75
+ def _dict(self) -> dict[str, Any]:
76
+ return {"measureHeader": {"measureIndex": self.measure_index}}
77
+
78
+
79
+ @define(frozen=True, slots=True)
80
+ class _TotalHeader(_Header):
81
+ """
82
+ Total header.
83
+ """
84
+
85
+ function: str
86
+
87
+ @cached_property
88
+ def _dict(self) -> dict[str, Any]:
89
+ return {"totalHeader": {"function": self.function}}
90
+
91
+
92
+ def _header_from_dict(d: dict[str, Any]) -> Optional[_Header]:
93
+ """
94
+ Convert dict representation to _Header object.
95
+ :param d: dictionary representation of a header
96
+ :return: _Header object or None if the dictionary does not represent a header or if it's not supported.
97
+ However, we expect that all execution results contain correct data.
98
+ """
99
+ if attribute_header := d.get("attributeHeader"):
100
+ if "labelValue" in attribute_header:
101
+ return _AttributeValuePrimary(
102
+ label_value=attribute_header["labelValue"], primary_label_value=attribute_header["primaryLabelValue"]
103
+ )
104
+ if "labelName" in attribute_header:
105
+ return _AttributeNameLocal(
106
+ label_name=attribute_header["labelName"], local_identifier=attribute_header["localIdentifier"]
107
+ )
108
+ return None
109
+
110
+ if measure_header := d.get("measureHeader"):
111
+ return _MeasureHeader(measure_header["measureIndex"])
112
+
113
+ if total_header := d.get("totalHeader"):
114
+ return _TotalHeader(total_header["function"])
115
+
116
+ return None
117
+
118
+
119
+ @define
120
+ class _HeaderContainer:
121
+ """
122
+ Container for headers to improve memory usage.
123
+ Unique headers are stored as keys in _header_cache and references to them are stored in _headers.
124
+ This way we avoid storing the same header multiple times, reducing memory allocations,
125
+ which is important for large datatables with many attributes.
126
+ """
127
+
128
+ _headers: list[_Header] = field(factory=list)
129
+ _header_cache: dict[_Header, _Header] = field(factory=dict)
130
+
131
+ def append(self, header_dict: dict) -> None:
132
+ """
133
+ Add header to the container.
134
+
135
+ First, try to convert header dict to _Header object, and return early if it's not possible.
136
+ Then, check if the header is already in the container.
137
+ If it is, get a pointer to the existing header and add it to the container.
138
+ If it is not, add it to the container.
139
+ """
140
+
141
+ header = _header_from_dict(header_dict)
142
+ if header is None:
143
+ return
144
+
145
+ if header not in self._header_cache:
146
+ self._header_cache[header] = header
147
+ self._headers.append(self._header_cache[header])
148
+
149
+ def extend(self, header_dicts: list[dict]) -> None:
150
+ """
151
+ Add multiple headers to the container.
152
+ """
153
+ for header_dict in header_dicts:
154
+ self.append(header_dict)
155
+
156
+ def __iter__(self) -> Iterator[_Header]:
157
+ yield from self._headers
158
+
159
+ def __len__(self) -> int:
160
+ return len(self._headers)
161
+
162
+ def __getitem__(self, index: int) -> _Header:
163
+ return self._headers[index]
164
+
165
+
166
+ _DataHeaderContainers = list[_HeaderContainer]
167
+
168
+ # Optimized version of _DataWithHeaders uses _HeaderContainer instead of list of headers
169
+ _HeadersByAxis = tuple[
170
+ Union[_DataHeaders, _DataHeaderContainers], Union[Optional[_DataHeaders], Optional[_DataHeaderContainers]]
171
+ ]
172
+
173
+
14
174
  @frozen
15
175
  class _DataWithHeaders:
16
176
  """Extracted data; either array of values for one-dimensional result or array of arrays of values.
@@ -18,7 +178,7 @@ class _DataWithHeaders:
18
178
  Attributes:
19
179
  data (List[_DataArray]):
20
180
  Extracted data; either array of values for one-dimensional result or array of arrays of values.
21
- data_headers (Tuple[_DataHeaders, Optional[_DataHeaders]]):
181
+ data_headers (_HeadersByAxis):
22
182
  Per-dimension headers for the data.
23
183
  grand_totals (Tuple[Optional[List[_DataArray]], Optional[List[_DataArray]]]):
24
184
  Per-dimension grand total data.
@@ -27,32 +187,34 @@ class _DataWithHeaders:
27
187
  """
28
188
 
29
189
  data: list[_DataArray]
30
- data_headers: tuple[_DataHeaders, Optional[_DataHeaders]]
190
+ data_headers: _HeadersByAxis
31
191
  grand_totals: tuple[Optional[list[_DataArray]], Optional[list[_DataArray]]]
32
192
  grand_total_headers: tuple[Optional[list[dict[str, _DataHeaders]]], Optional[list[dict[str, _DataHeaders]]]]
33
193
 
34
194
 
35
195
  @define
36
- class _AccumulatedData:
196
+ class _AbstractAccumulatedData(ABC):
37
197
  """
38
198
  Utility class to offload code from the function that extracts all data and headers for a
39
199
  particular paged result. The method drives the paging and calls out to this class to accumulate
40
200
  the essential data and headers from the page.
201
+ Note that if optimized is enabled, the data_headers are stored in _HeaderContainer instead of list of headers.
202
+ We do not store grand_totals_headers in _HeaderContainer, as we do not except
41
203
 
42
204
  Attributes:
43
205
  data (List[_DataArray]): Holds the accumulated data arrays from the pages.
44
- data_headers (List[Optional[_DataHeaders]]): Holds the headers for data arrays.
206
+ data_headers (List[Optional[Any]]): Holds the headers for data arrays.
45
207
  grand_totals (List[Optional[List[_DataArray]]]): Holds the grand total data arrays.
46
208
  grand_totals_headers (List[Optional[_DataHeaders]]): Holds the headers for grand total data arrays.
47
209
  """
48
210
 
49
211
  data: list[_DataArray] = field(init=False, factory=list)
50
- data_headers: list[Optional[_DataHeaders]] = field(init=False, factory=lambda: [None, None])
212
+ data_headers: list[Optional[Any]] = field(init=False, factory=lambda: [None, None])
51
213
  grand_totals: list[Optional[list[_DataArray]]] = field(init=False, factory=lambda: [None, None])
214
+ total_of_grant_totals_processed: bool = field(init=False, default=False)
52
215
  grand_totals_headers: list[Optional[list[dict[str, _DataHeaders]]]] = field(
53
216
  init=False, factory=lambda: [None, None]
54
217
  )
55
- total_of_grant_totals_processed: bool = field(init=False, default=False)
56
218
 
57
219
  def accumulate_data(self, from_result: ExecutionResult) -> None:
58
220
  """
@@ -79,24 +241,6 @@ class _AccumulatedData:
79
241
  for i in range(len(from_result.data)):
80
242
  self.data[offset + i].extend(from_result.data[i])
81
243
 
82
- def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
83
- """
84
- Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
85
- matching the dimension index.
86
-
87
- This will mutate the `data_headers`.
88
-
89
- Args:
90
- from_result (ExecutionResult): The result whose headers will be accumulated.
91
- from_dim (int): The dimension index.
92
- """
93
-
94
- if self.data_headers[from_dim] is None:
95
- self.data_headers[from_dim] = from_result.get_all_headers(dim=from_dim)
96
- else:
97
- for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
98
- cast(_DataHeaders, self.data_headers[from_dim])[idx].extend(headers)
99
-
100
244
  def accumulate_grand_totals(
101
245
  self, from_result: ExecutionResult, paging_dim: int, response: BareExecutionResponse
102
246
  ) -> None:
@@ -161,6 +305,56 @@ class _AccumulatedData:
161
305
  # have row totals and paging down, keep adding extra rows
162
306
  grand_totals_item.extend(grand_total["data"])
163
307
 
308
+ @abstractmethod
309
+ def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
310
+ """
311
+ Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
312
+ matching the dimension index.
313
+
314
+ This will mutate the `data_headers`.
315
+
316
+ Args:
317
+ from_result (ExecutionResult): The result whose headers will be accumulated.
318
+ from_dim (int): The dimension index.
319
+ """
320
+
321
+ @abstractmethod
322
+ def result(self) -> _DataWithHeaders:
323
+ """
324
+ Returns the data with headers.
325
+
326
+ Returns:
327
+ _DataWithHeaders: The data, data headers, grand totals and grand total headers.
328
+ """
329
+
330
+
331
+ @define
332
+ class _AccumulatedData(_AbstractAccumulatedData):
333
+ """
334
+ Implementation of _AbstractAccumulatedData that uses list of dicts as storage,
335
+ which is used when non-optimized data extraction is used.
336
+
337
+ This implementation may lead to uncontrolled memory usage for large results.
338
+ """
339
+
340
+ def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
341
+ """
342
+ Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
343
+ matching the dimension index.
344
+
345
+ This will mutate the `data_headers`.
346
+
347
+ Args:
348
+ from_result (ExecutionResult): The result whose headers will be accumulated.
349
+ from_dim (int): The dimension index.
350
+ """
351
+
352
+ if self.data_headers[from_dim] is None:
353
+ self.data_headers[from_dim] = from_result.get_all_headers(dim=from_dim)
354
+ else:
355
+ for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
356
+ cast(_DataHeaders, self.data_headers[from_dim])[idx].extend(headers)
357
+
164
358
  def result(self) -> _DataWithHeaders:
165
359
  """
166
360
  Returns the data with headers.
@@ -176,6 +370,55 @@ class _AccumulatedData:
176
370
  )
177
371
 
178
372
 
373
+ @define
374
+ class _OptimizedAccumulatedData(_AbstractAccumulatedData):
375
+ """
376
+ Implementation of _AbstractAccumulatedData that stores headers in _HeaderContainer objects,
377
+ which is used when optimized data extraction is used.
378
+
379
+ This implementation is more memory efficient than _AccumulatedData.
380
+ """
381
+
382
+ def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
383
+ """
384
+ Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
385
+ matching the dimension index.
386
+
387
+ This will mutate the `data_headers`.
388
+
389
+ Args:
390
+ from_result (ExecutionResult): The result whose headers will be accumulated.
391
+ from_dim (int): The dimension index.
392
+ """
393
+
394
+ if containers := self.data_headers[from_dim]:
395
+ for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
396
+ containers[idx].extend(headers)
397
+ else:
398
+ self.data_headers[from_dim] = []
399
+ containers = []
400
+ for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
401
+ hc = _HeaderContainer()
402
+ hc.extend(headers)
403
+ containers.append(hc)
404
+ self.data_headers[from_dim] = containers
405
+
406
+ def result(self) -> _DataWithHeaders:
407
+ """
408
+ Returns the data with headers.
409
+
410
+ Returns:
411
+ _DataWithHeaders: The data, data headers, grand totals and grand total headers.
412
+ """
413
+
414
+ return _DataWithHeaders(
415
+ data=self.data,
416
+ data_headers=(cast(_DataHeaderContainers, self.data_headers[0]), self.data_headers[1]),
417
+ grand_totals=(self.grand_totals[0], self.grand_totals[1]),
418
+ grand_total_headers=(self.grand_totals_headers[0], self.grand_totals_headers[1]),
419
+ )
420
+
421
+
179
422
  @define
180
423
  class DataFrameMetadata:
181
424
  """
@@ -194,11 +437,13 @@ class DataFrameMetadata:
194
437
  | AVG | 150
195
438
  SUM | | 450
196
439
 
440
+ column_totals_indexes: Similar to row_totals_indexes but for column headers.
197
441
  execution_response: An instance of BareExecutionResponse representing the
198
442
  execution response.
199
443
  """
200
444
 
201
445
  row_totals_indexes: list[list[int]]
446
+ column_totals_indexes: list[list[int]]
202
447
  execution_response: BareExecutionResponse
203
448
  primary_labels_from_index: dict[int, dict[str, str]]
204
449
  primary_labels_from_columns: dict[int, dict[str, str]]
@@ -206,27 +451,36 @@ class DataFrameMetadata:
206
451
  @classmethod
207
452
  def from_data(
208
453
  cls,
209
- headers: tuple[_DataHeaders, Optional[_DataHeaders]],
454
+ headers: _HeadersByAxis,
210
455
  execution_response: BareExecutionResponse,
211
456
  primary_labels_from_index: dict[int, dict[str, str]],
212
457
  primary_labels_from_columns: dict[int, dict[str, str]],
213
458
  ) -> "DataFrameMetadata":
214
459
  """This method constructs a DataFrameMetadata object from data headers and an execution response.
215
460
 
216
- Args: headers (Tuple[_DataHeaders, Optional[_DataHeaders]]):
461
+ Args: headers (_HeadersByAxis):
217
462
  A tuple containing data headers. execution_response (BareExecutionResponse): An ExecutionResponse object.
218
463
 
219
464
  Returns: DataFrameMetadata: An initialized DataFrameMetadata object."""
220
- row_totals_indexes = [
221
- [idx for idx, hdr in enumerate(dim) if hdr is not None and "totalHeader" in hdr] for dim in headers[0]
222
- ]
465
+ row_totals_indexes = cls._get_totals_indexes(headers[0])
466
+ column_totals_indexes = cls._get_totals_indexes(headers[1])
223
467
  return cls(
224
468
  row_totals_indexes=row_totals_indexes,
469
+ column_totals_indexes=column_totals_indexes,
225
470
  execution_response=execution_response,
226
471
  primary_labels_from_index=primary_labels_from_index,
227
472
  primary_labels_from_columns=primary_labels_from_columns,
228
473
  )
229
474
 
475
+ @staticmethod
476
+ def _get_totals_indexes(headers: Optional[Any]) -> list[list[int]]:
477
+ if headers is None:
478
+ return []
479
+ return [
480
+ [idx for idx, hdr in enumerate(dim) if hdr is not None and hdr.get("totalHeader") is not None]
481
+ for dim in headers
482
+ ]
483
+
230
484
 
231
485
  def _read_complete_execution_result(
232
486
  execution_response: BareExecutionResponse,
@@ -234,6 +488,7 @@ def _read_complete_execution_result(
234
488
  result_size_dimensions_limits: ResultSizeDimensions,
235
489
  result_size_bytes_limit: Optional[int] = None,
236
490
  page_size: int = _DEFAULT_PAGE_SIZE,
491
+ optimized: bool = False,
237
492
  ) -> _DataWithHeaders:
238
493
  """
239
494
  Extracts all data and headers for an execution result. This does page around the execution result to extract
@@ -245,6 +500,10 @@ def _read_complete_execution_result(
245
500
  result_size_dimensions_limits (ResultSizeDimensions): Limits for result size dimensions.
246
501
  result_size_bytes_limit (Optional[int], optional): Limit for result size in bytes. Defaults to None.
247
502
  page_size (int, optional): Page size to use when reading data. Defaults to _DEFAULT_PAGE_SIZE.
503
+ optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
504
+ headers in memory as lists of dicts, which can consume a lot of memory for large results.
505
+ Optimized accumulator stores only unique values and story only reference to them in the list,
506
+ which can significantly reduce memory usage.
248
507
 
249
508
  Returns:
250
509
  _DataWithHeaders: All the data and headers from the execution result.
@@ -252,10 +511,10 @@ def _read_complete_execution_result(
252
511
  num_dims = len(execution_response.dimensions)
253
512
  offset = [0] * num_dims
254
513
  limit = [page_size] * num_dims
255
- acc = _AccumulatedData()
256
514
 
257
- result_size_limits_checked = False
515
+ acc = _OptimizedAccumulatedData() if optimized else _AccumulatedData()
258
516
 
517
+ result_size_limits_checked = False
259
518
  while True:
260
519
  # top-level loop pages through the first dimension;
261
520
  #
@@ -303,7 +562,6 @@ def _read_complete_execution_result(
303
562
  break
304
563
 
305
564
  offset = [result.next_page_start(dim=0), 0] if num_dims > 1 else [result.next_page_start(dim=0)]
306
-
307
565
  return acc.result()
308
566
 
309
567
 
@@ -339,14 +597,14 @@ def _create_header_mapper(
339
597
  attribute_labels = label_overrides.get("labels", {})
340
598
  measure_labels = label_overrides.get("metrics", {})
341
599
 
342
- def _mapper(header: Any, header_idx: Optional[int]) -> Optional[str]:
600
+ def _mapper(header: Union[dict, _Header, None], header_idx: Optional[int]) -> Optional[str]:
343
601
  label = None
344
602
  if header is None:
345
603
  pass
346
- elif "attributeHeader" in header:
347
- if "labelValue" in header["attributeHeader"]:
348
- label_value = header["attributeHeader"]["labelValue"]
349
- primary_label_value = header["attributeHeader"]["primaryLabelValue"]
604
+ elif attribute_header := header.get("attributeHeader"):
605
+ if "labelValue" in attribute_header:
606
+ label_value = attribute_header["labelValue"]
607
+ primary_label_value = attribute_header["primaryLabelValue"]
350
608
  label = primary_label_value if use_primary_labels_in_attributes else label_value
351
609
  if header_idx is not None:
352
610
  if header_idx in primary_attribute_labels_mapping:
@@ -359,17 +617,18 @@ def _create_header_mapper(
359
617
  # Excel formatter apply call failure
360
618
  if label is None:
361
619
  label = " "
362
- elif "labelName" in header["attributeHeader"]:
363
- attr_local_id = header["attributeHeader"]["localIdentifier"]
620
+ elif "labelName" in attribute_header:
621
+ attr_local_id = attribute_header["localIdentifier"]
364
622
  if use_local_ids_in_headers:
365
623
  label = attr_local_id
366
624
  else:
367
625
  if attr_local_id in attribute_labels:
368
626
  label = attribute_labels[attr_local_id]["title"]
369
627
  else:
370
- label = header["attributeHeader"]["labelName"]
371
- elif "measureHeader" in header and header_idx is not None:
372
- measure_idx = header["measureHeader"]["measureIndex"]
628
+ label = attribute_header["labelName"]
629
+
630
+ elif (measure_header := header.get("measureHeader")) and header_idx is not None:
631
+ measure_idx = measure_header["measureIndex"]
373
632
  measure_descriptor = dim_descriptor["headers"][header_idx]["measureGroupHeaders"][measure_idx]
374
633
 
375
634
  if use_local_ids_in_headers:
@@ -381,8 +640,9 @@ def _create_header_mapper(
381
640
  label = measure_descriptor["name"]
382
641
  else:
383
642
  label = measure_descriptor["localIdentifier"]
384
- elif "totalHeader" in header:
385
- label = header["totalHeader"]["function"]
643
+
644
+ elif total_header := header.get("totalHeader"):
645
+ label = total_header["function"]
386
646
  return label
387
647
 
388
648
  return _mapper
@@ -390,7 +650,7 @@ def _create_header_mapper(
390
650
 
391
651
  def _headers_to_index(
392
652
  dim_idx: int,
393
- headers: tuple[_DataHeaders, Optional[_DataHeaders]],
653
+ headers: _HeadersByAxis,
394
654
  response: BareExecutionResponse,
395
655
  label_overrides: LabelOverrides,
396
656
  use_local_ids_in_headers: bool = False,
@@ -432,7 +692,7 @@ def _headers_to_index(
432
692
  return pandas.MultiIndex.from_arrays(
433
693
  [
434
694
  tuple(mapper(header, header_idx) for header in header_group)
435
- for header_idx, header_group in enumerate(cast(_DataHeaders, headers[dim_idx]))
695
+ for header_idx, header_group in enumerate(cast(list, headers[dim_idx]))
436
696
  ],
437
697
  names=[mapper(dim_header, None) for dim_header in (response.dimensions[dim_idx]["headers"])],
438
698
  ), primary_attribute_labels_mapping
@@ -465,17 +725,17 @@ def _merge_grand_totals_into_data(extract: _DataWithHeaders) -> Union[_DataArray
465
725
  return data
466
726
 
467
727
 
468
- def _merge_grand_total_headers_into_headers(extract: _DataWithHeaders) -> tuple[_DataHeaders, Optional[_DataHeaders]]:
728
+ def _merge_grand_total_headers_into_headers(extract: _DataWithHeaders) -> _HeadersByAxis:
469
729
  """Merges grand total headers into data headers. This function will mutate the extracted data.
470
730
 
471
731
  Args:
472
732
  extract (_DataWithHeaders): The data along with its headers that need to be merged.
473
733
 
474
734
  Returns:
475
- Tuple[_DataHeaders, Optional[_DataHeaders]]:
735
+ _HeadersByAxis:
476
736
  A tuple containing the modified data headers and the grand total headers if present.
477
737
  """
478
- headers: tuple[_DataHeaders, Optional[_DataHeaders]] = extract.data_headers
738
+ headers: _HeadersByAxis = extract.data_headers
479
739
 
480
740
  for dim_idx, grand_total_headers in enumerate(extract.grand_total_headers):
481
741
  if grand_total_headers is None:
@@ -496,6 +756,7 @@ def convert_execution_response_to_dataframe(
496
756
  use_local_ids_in_headers: bool = False,
497
757
  use_primary_labels_in_attributes: bool = False,
498
758
  page_size: int = _DEFAULT_PAGE_SIZE,
759
+ optimized: bool = False,
499
760
  ) -> tuple[pandas.DataFrame, DataFrameMetadata]:
500
761
  """
501
762
  Converts execution result to a pandas dataframe, maintaining the dimensionality of the result.
@@ -511,6 +772,10 @@ def convert_execution_response_to_dataframe(
511
772
  use_primary_labels_in_attributes (bool, default=False): Use primary labels in attributes if True, else use
512
773
  default settings.
513
774
  page_size (int, default=_DEFAULT_PAGE_SIZE): Size of the page.
775
+ optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
776
+ headers in memory as lists of dicts, which can consume a lot of memory for large results.
777
+ Optimized accumulator stores only unique values and story only reference to them in the list,
778
+ which can significantly reduce memory usage.
514
779
 
515
780
  Returns:
516
781
  Tuple[pandas.DataFrame, DataFrameMetadata]: A tuple containing the created dataframe and its metadata.
@@ -521,7 +786,9 @@ def convert_execution_response_to_dataframe(
521
786
  result_size_dimensions_limits=result_size_dimensions_limits,
522
787
  result_size_bytes_limit=result_size_bytes_limit,
523
788
  page_size=page_size,
789
+ optimized=optimized,
524
790
  )
791
+
525
792
  full_data = _merge_grand_totals_into_data(extract)
526
793
  full_headers = _merge_grand_total_headers_into_headers(extract)
527
794
 
@@ -1,42 +1,28 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gooddata-pandas
3
- Version: 1.47.0
3
+ Version: 1.55.1.dev2
4
4
  Summary: GoodData Cloud to pandas
5
- Author: GoodData
6
- Author-email: support@gooddata.com
7
- License: MIT
8
- Project-URL: Documentation, https://gooddata-pandas.readthedocs.io/en/v1.47.0
5
+ Project-URL: Documentation, https://gooddata-pandas.readthedocs.io/en/v1.55.1.dev2
9
6
  Project-URL: Source, https://github.com/gooddata/gooddata-python-sdk
10
- Keywords: gooddata,pandas,series,data,frame,data_frame,analytics,headless,business,intelligence,headless-bi,cloud,native,semantic,layer,sql,metrics
7
+ Author-email: GoodData <support@gooddata.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE.txt
10
+ Keywords: analytics,business,cloud,data,data_frame,frame,gooddata,headless,headless-bi,intelligence,layer,metrics,native,pandas,semantic,series,sql
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Environment :: Console
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3.9
15
13
  Classifier: Programming Language :: Python :: 3.10
16
14
  Classifier: Programming Language :: Python :: 3.11
17
15
  Classifier: Programming Language :: Python :: 3.12
18
16
  Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
19
18
  Classifier: Topic :: Database
20
19
  Classifier: Topic :: Scientific/Engineering
21
20
  Classifier: Topic :: Software Development
22
21
  Classifier: Typing :: Typed
23
- Requires-Python: >=3.9.0
24
- Description-Content-Type: text/markdown
25
- License-File: LICENSE.txt
26
- Requires-Dist: gooddata-sdk~=1.47.0
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: gooddata-sdk~=1.55.1.dev2
27
24
  Requires-Dist: pandas<3.0.0,>=2.0.0
28
- Dynamic: author
29
- Dynamic: author-email
30
- Dynamic: classifier
31
- Dynamic: description
32
- Dynamic: description-content-type
33
- Dynamic: keywords
34
- Dynamic: license
35
- Dynamic: license-file
36
- Dynamic: project-url
37
- Dynamic: requires-dist
38
- Dynamic: requires-python
39
- Dynamic: summary
25
+ Description-Content-Type: text/markdown
40
26
 
41
27
  # GoodData Pandas
42
28
 
@@ -50,7 +36,7 @@ See [DOCUMENTATION](https://gooddata-pandas.readthedocs.io/en/latest/) for more
50
36
  - GoodData.CN installation; either running on your cloud
51
37
  infrastructure or the free Community Edition running on your workstation
52
38
 
53
- - Python 3.9 or newer
39
+ - Python 3.10 or newer
54
40
 
55
41
  ## Installation
56
42
 
@@ -0,0 +1,13 @@
1
+ gooddata_pandas/__init__.py,sha256=Ta3qIIDq7kBRUsYSV3aC69AQBFvFvhtWDQucgP-l88w,297
2
+ gooddata_pandas/_version.py,sha256=960vTs6l7xsN2BOXWCxOc4PSKdzzKhnNEPTMnmMTCQs,119
3
+ gooddata_pandas/data_access.py,sha256=VPFjlOVH4dsQvbspEkT6UG_g3yA9sE5g8OLSrqKaeH4,20129
4
+ gooddata_pandas/dataframe.py,sha256=_riBCtkV7zJZ8YlvacPfpxs0gENMgV0W4nNii6Ei-2A,18074
5
+ gooddata_pandas/good_pandas.py,sha256=2GzISAD9J2CQy3KM8kuelPazOFfjA5g4v_p3TyINBW8,3474
6
+ gooddata_pandas/py.typed,sha256=u_MS29sadlaIqGRPYFjWml5u0gQnoQfvbsf9pu3TZJU,94
7
+ gooddata_pandas/result_convertor.py,sha256=Tv6Ee3JxxFbKoPmXz0R0fl7x7HnI0-5pHycFu-QsFus,34928
8
+ gooddata_pandas/series.py,sha256=ELBSg1jKy-AYrtXErpNhsmQ0Zd6mP1M6FNS6bGgNPyI,6780
9
+ gooddata_pandas/utils.py,sha256=JhWs0WYqg-9o3aWRP21ERFAxCKoT5oNKQ5mKlJh4uT4,8091
10
+ gooddata_pandas-1.55.1.dev2.dist-info/METADATA,sha256=fnYJMMShmWqrTrJ9zKsVctAz2rRNT4ayXvGtW0DLvXQ,2847
11
+ gooddata_pandas-1.55.1.dev2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ gooddata_pandas-1.55.1.dev2.dist-info/licenses/LICENSE.txt,sha256=3RjzQk8y9HG1_LgqvbEqWZKJnTQGOO1cpzYzBc13Myk,149825
13
+ gooddata_pandas-1.55.1.dev2.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,14 +0,0 @@
1
- gooddata_pandas/__init__.py,sha256=Ta3qIIDq7kBRUsYSV3aC69AQBFvFvhtWDQucgP-l88w,297
2
- gooddata_pandas/_version.py,sha256=960vTs6l7xsN2BOXWCxOc4PSKdzzKhnNEPTMnmMTCQs,119
3
- gooddata_pandas/data_access.py,sha256=VPFjlOVH4dsQvbspEkT6UG_g3yA9sE5g8OLSrqKaeH4,20129
4
- gooddata_pandas/dataframe.py,sha256=EsOgO8O42JBg1as0RZVwbeVOlGlENpkEsvlL-Xi5Jsg,16679
5
- gooddata_pandas/good_pandas.py,sha256=2GzISAD9J2CQy3KM8kuelPazOFfjA5g4v_p3TyINBW8,3474
6
- gooddata_pandas/py.typed,sha256=u_MS29sadlaIqGRPYFjWml5u0gQnoQfvbsf9pu3TZJU,94
7
- gooddata_pandas/result_convertor.py,sha256=r7uFrjeM6cxMy08YcS3LywF1iUPSyEyG3BAddh0DkIQ,25807
8
- gooddata_pandas/series.py,sha256=ELBSg1jKy-AYrtXErpNhsmQ0Zd6mP1M6FNS6bGgNPyI,6780
9
- gooddata_pandas/utils.py,sha256=JhWs0WYqg-9o3aWRP21ERFAxCKoT5oNKQ5mKlJh4uT4,8091
10
- gooddata_pandas-1.47.0.dist-info/licenses/LICENSE.txt,sha256=3RjzQk8y9HG1_LgqvbEqWZKJnTQGOO1cpzYzBc13Myk,149825
11
- gooddata_pandas-1.47.0.dist-info/METADATA,sha256=TW89fiNcfnRNuTio6dwsR7JnZGFnpXpiCO-m8bVz5iI,3133
12
- gooddata_pandas-1.47.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- gooddata_pandas-1.47.0.dist-info/top_level.txt,sha256=B7K_WFxlxplJbEbv5Mf0YhX74dbOpTPgDX-W6I7CssI,16
14
- gooddata_pandas-1.47.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- gooddata_pandas