PyPI - gooddata-pandas - Versions diffs - 1.47.0__py3-none-any.whl → 1.55.1.dev2__py3-none-any.whl - Mend

gooddata-pandas 1.47.0py3-none-any.whl → 1.55.1.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

gooddata_pandas/dataframe.py CHANGED Viewed

@@ -238,6 +238,7 @@ class DataFrameFactory:
         created_visualizations_response: dict,
         on_execution_submitted: Optional[Callable[[Execution], None]] = None,
         is_cancellable: bool = False,
+        optimized: bool = False,
     ) -> tuple[pandas.DataFrame, DataFrameMetadata]:
         """
         Creates a data frame using a created visualization.
@@ -247,6 +248,10 @@ class DataFrameFactory:
             on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
                 submitted to the backend.
             is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
+            optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
+                headers in memory as lists of dicts, which can consume a lot of memory for large results.
+                Optimized accumulator stores only unique values and story only reference to them in the list,
+                which can significantly reduce memory usage.
         Returns:
             pandas.DataFrame: A DataFrame instance.
@@ -257,6 +262,7 @@ class DataFrameFactory:
         return self.for_exec_def(
             exec_def=execution_definition,
             on_execution_submitted=on_execution_submitted,
+            optimized=optimized,
         )
     def result_cache_metadata_for_exec_result_id(self, result_id: str) -> ResultCacheMetadata:
@@ -279,6 +285,7 @@ class DataFrameFactory:
         result_size_bytes_limit: Optional[int] = None,
         page_size: int = _DEFAULT_PAGE_SIZE,
         on_execution_submitted: Optional[Callable[[Execution], None]] = None,
+        optimized: bool = False,
     ) -> tuple[pandas.DataFrame, DataFrameMetadata]:
         """
         Creates a data frame using an execution definition.
@@ -311,6 +318,10 @@ class DataFrameFactory:
             page_size (int): Number of records per page.
             on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
                 submitted to the backend.
+            optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
+                headers in memory as lists of dicts, which can consume a lot of memory for large results.
+                Optimized accumulator stores only unique values and story only reference to them in the list,
+                which can significantly reduce memory usage.
         Returns:
             Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
@@ -331,6 +342,7 @@ class DataFrameFactory:
             result_size_dimensions_limits=result_size_dimensions_limits,
             result_size_bytes_limit=result_size_bytes_limit,
             page_size=page_size,
+            optimized=optimized,
         )
     def for_exec_result_id(
@@ -343,6 +355,7 @@ class DataFrameFactory:
         use_local_ids_in_headers: bool = False,
         use_primary_labels_in_attributes: bool = False,
         page_size: int = _DEFAULT_PAGE_SIZE,
+        optimized: bool = False,
     ) -> tuple[pandas.DataFrame, DataFrameMetadata]:
         """
             Retrieves a DataFrame and DataFrame metadata for a given execution result identifier.
@@ -373,6 +386,10 @@ class DataFrameFactory:
             use_local_ids_in_headers (bool): Use local identifier in headers.
             use_primary_labels_in_attributes (bool): Use primary labels in attributes.
             page_size (int): Number of records per page.
+            optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
+                headers in memory as lists of dicts, which can consume a lot of memory for large results.
+                Optimized accumulator stores only unique values and story only reference to them in the list,
+                which can significantly reduce memory usage.
         Returns:
             Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
@@ -398,4 +415,5 @@ class DataFrameFactory:
             use_local_ids_in_headers=use_local_ids_in_headers,
             use_primary_labels_in_attributes=use_primary_labels_in_attributes,
             page_size=page_size,
+            optimized=optimized,
         )

gooddata_pandas/result_convertor.py CHANGED Viewed

@@ -1,4 +1,7 @@
 # (C) 2022 GoodData Corporation
+from abc import ABC, abstractmethod
+from collections.abc import Iterator
+from functools import cached_property
 from typing import Any, Callable, Optional, Union, cast
 import pandas
@@ -11,6 +14,163 @@ _DataArray = list[Union[int, None]]
 LabelOverrides = dict[str, dict[str, dict[str, str]]]
+@define(frozen=True, slots=True)
+class _Header(ABC):
+    """
+    Abstract base class for headers. There are 4 types of headers:
+    - attribute header with attribute value and primary label value
+    - attribute header with label name and label identifier
+    - measure header
+    - total header
+    We convert dict representation to _Header objects with slots to improve memory usage.
+    """
+    @cached_property
+    @abstractmethod
+    def _dict(self) -> dict[str, Any]:
+        pass
+    def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]:
+        return self._dict.get(key, default)
+@define(frozen=True, slots=True)
+class _AttributeValuePrimary(_Header):
+    """
+    Attribute header with label value and primary label value.
+    """
+    label_value: str
+    primary_label_value: str
+    @cached_property
+    def _dict(self) -> dict[str, Any]:
+        return {"attributeHeader": {"labelValue": self.label_value, "primaryLabelValue": self.primary_label_value}}
+@define(frozen=True, slots=True)
+class _AttributeNameLocal(_Header):
+    """
+    Attribute header with label name and label identifier.
+    """
+    label_name: str
+    local_identifier: str
+    @cached_property
+    def _dict(self) -> dict[str, Any]:
+        return {"attributeHeader": {"labelName": self.label_name, "localIdentifier": self.local_identifier}}
+@define(frozen=True, slots=True)
+class _MeasureHeader(_Header):
+    """
+    Measure header.
+    """
+    measure_index: str
+    @cached_property
+    def _dict(self) -> dict[str, Any]:
+        return {"measureHeader": {"measureIndex": self.measure_index}}
+@define(frozen=True, slots=True)
+class _TotalHeader(_Header):
+    """
+    Total header.
+    """
+    function: str
+    @cached_property
+    def _dict(self) -> dict[str, Any]:
+        return {"totalHeader": {"function": self.function}}
+def _header_from_dict(d: dict[str, Any]) -> Optional[_Header]:
+    """
+    Convert dict representation to _Header object.
+    :param d: dictionary representation of a header
+    :return: _Header object or None if the dictionary does not represent a header or if it's not supported.
+        However, we expect that all execution results contain correct data.
+    """
+    if attribute_header := d.get("attributeHeader"):
+        if "labelValue" in attribute_header:
+            return _AttributeValuePrimary(
+                label_value=attribute_header["labelValue"], primary_label_value=attribute_header["primaryLabelValue"]
+            )
+        if "labelName" in attribute_header:
+            return _AttributeNameLocal(
+                label_name=attribute_header["labelName"], local_identifier=attribute_header["localIdentifier"]
+            )
+        return None
+    if measure_header := d.get("measureHeader"):
+        return _MeasureHeader(measure_header["measureIndex"])
+    if total_header := d.get("totalHeader"):
+        return _TotalHeader(total_header["function"])
+    return None
+@define
+class _HeaderContainer:
+    """
+    Container for headers to improve memory usage.
+    Unique headers are stored as keys in _header_cache and references to them are stored in _headers.
+    This way we avoid storing the same header multiple times, reducing memory allocations,
+    which is important for large datatables with many attributes.
+    """
+    _headers: list[_Header] = field(factory=list)
+    _header_cache: dict[_Header, _Header] = field(factory=dict)
+    def append(self, header_dict: dict) -> None:
+        """
+        Add header to the container.
+        First, try to convert header dict to _Header object, and return early if it's not possible.
+        Then, check if the header is already in the container.
+        If it is, get a pointer to the existing header and add it to the container.
+        If it is not, add it to the container.
+        """
+        header = _header_from_dict(header_dict)
+        if header is None:
+            return
+        if header not in self._header_cache:
+            self._header_cache[header] = header
+        self._headers.append(self._header_cache[header])
+    def extend(self, header_dicts: list[dict]) -> None:
+        """
+        Add multiple headers to the container.
+        """
+        for header_dict in header_dicts:
+            self.append(header_dict)
+    def __iter__(self) -> Iterator[_Header]:
+        yield from self._headers
+    def __len__(self) -> int:
+        return len(self._headers)
+    def __getitem__(self, index: int) -> _Header:
+        return self._headers[index]
+_DataHeaderContainers = list[_HeaderContainer]
+# Optimized version of _DataWithHeaders uses _HeaderContainer instead of list of headers
+_HeadersByAxis = tuple[
+    Union[_DataHeaders, _DataHeaderContainers], Union[Optional[_DataHeaders], Optional[_DataHeaderContainers]]
+]
 @frozen
 class _DataWithHeaders:
     """Extracted data; either array of values for one-dimensional result or array of arrays of values.
@@ -18,7 +178,7 @@ class _DataWithHeaders:
     Attributes:
         data (List[_DataArray]):
             Extracted data; either array of values for one-dimensional result or array of arrays of values.
-        data_headers (Tuple[_DataHeaders, Optional[_DataHeaders]]):
+        data_headers (_HeadersByAxis):
             Per-dimension headers for the data.
         grand_totals (Tuple[Optional[List[_DataArray]], Optional[List[_DataArray]]]):
             Per-dimension grand total data.
@@ -27,32 +187,34 @@ class _DataWithHeaders:
     """
     data: list[_DataArray]
-    data_headers: tuple[_DataHeaders, Optional[_DataHeaders]]
+    data_headers: _HeadersByAxis
     grand_totals: tuple[Optional[list[_DataArray]], Optional[list[_DataArray]]]
     grand_total_headers: tuple[Optional[list[dict[str, _DataHeaders]]], Optional[list[dict[str, _DataHeaders]]]]
 @define
-class _AccumulatedData:
+class _AbstractAccumulatedData(ABC):
     """
     Utility class to offload code from the function that extracts all data and headers for a
     particular paged result. The method drives the paging and calls out to this class to accumulate
     the essential data and headers from the page.
+    Note that if optimized is enabled, the data_headers are stored in _HeaderContainer instead of list of headers.
+    We do not store grand_totals_headers in _HeaderContainer, as we do not except
     Attributes:
         data (List[_DataArray]): Holds the accumulated data arrays from the pages.
-        data_headers (List[Optional[_DataHeaders]]): Holds the headers for data arrays.
+        data_headers (List[Optional[Any]]): Holds the headers for data arrays.
         grand_totals (List[Optional[List[_DataArray]]]): Holds the grand total data arrays.
         grand_totals_headers (List[Optional[_DataHeaders]]): Holds the headers for grand total data arrays.
     """
     data: list[_DataArray] = field(init=False, factory=list)
-    data_headers: list[Optional[_DataHeaders]] = field(init=False, factory=lambda: [None, None])
+    data_headers: list[Optional[Any]] = field(init=False, factory=lambda: [None, None])
     grand_totals: list[Optional[list[_DataArray]]] = field(init=False, factory=lambda: [None, None])
+    total_of_grant_totals_processed: bool = field(init=False, default=False)
     grand_totals_headers: list[Optional[list[dict[str, _DataHeaders]]]] = field(
         init=False, factory=lambda: [None, None]
     )
-    total_of_grant_totals_processed: bool = field(init=False, default=False)
     def accumulate_data(self, from_result: ExecutionResult) -> None:
         """
@@ -79,24 +241,6 @@ class _AccumulatedData:
         for i in range(len(from_result.data)):
             self.data[offset + i].extend(from_result.data[i])
-    def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
-        """
-        Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
-        matching the dimension index.
-        This will mutate the `data_headers`.
-        Args:
-            from_result (ExecutionResult): The result whose headers will be accumulated.
-            from_dim (int): The dimension index.
-        """
-        if self.data_headers[from_dim] is None:
-            self.data_headers[from_dim] = from_result.get_all_headers(dim=from_dim)
-        else:
-            for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
-                cast(_DataHeaders, self.data_headers[from_dim])[idx].extend(headers)
     def accumulate_grand_totals(
         self, from_result: ExecutionResult, paging_dim: int, response: BareExecutionResponse
     ) -> None:
@@ -161,6 +305,56 @@ class _AccumulatedData:
                     # have row totals and paging down, keep adding extra rows
                     grand_totals_item.extend(grand_total["data"])
+    @abstractmethod
+    def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
+        """
+        Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
+        matching the dimension index.
+        This will mutate the `data_headers`.
+        Args:
+            from_result (ExecutionResult): The result whose headers will be accumulated.
+            from_dim (int): The dimension index.
+        """
+    @abstractmethod
+    def result(self) -> _DataWithHeaders:
+        """
+        Returns the data with headers.
+        Returns:
+            _DataWithHeaders: The data, data headers, grand totals and grand total headers.
+        """
+@define
+class _AccumulatedData(_AbstractAccumulatedData):
+    """
+    Implementation of _AbstractAccumulatedData that uses list of dicts as storage,
+    which is used when non-optimized data extraction is used.
+    This implementation may lead to uncontrolled memory usage for large results.
+    """
+    def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
+        """
+        Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
+        matching the dimension index.
+        This will mutate the `data_headers`.
+        Args:
+            from_result (ExecutionResult): The result whose headers will be accumulated.
+            from_dim (int): The dimension index.
+        """
+        if self.data_headers[from_dim] is None:
+            self.data_headers[from_dim] = from_result.get_all_headers(dim=from_dim)
+        else:
+            for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
+                cast(_DataHeaders, self.data_headers[from_dim])[idx].extend(headers)
     def result(self) -> _DataWithHeaders:
         """
         Returns the data with headers.
@@ -176,6 +370,55 @@ class _AccumulatedData:
         )
+@define
+class _OptimizedAccumulatedData(_AbstractAccumulatedData):
+    """
+    Implementation of _AbstractAccumulatedData that stores headers in _HeaderContainer objects,
+    which is used when optimized data extraction is used.
+    This implementation is more memory efficient than _AccumulatedData.
+    """
+    def accumulate_headers(self, from_result: ExecutionResult, from_dim: int) -> None:
+        """
+        Accumulate headers for a particular dimension of a result into the provided `data_headers` array at the index
+        matching the dimension index.
+        This will mutate the `data_headers`.
+        Args:
+            from_result (ExecutionResult): The result whose headers will be accumulated.
+            from_dim (int): The dimension index.
+        """
+        if containers := self.data_headers[from_dim]:
+            for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
+                containers[idx].extend(headers)
+        else:
+            self.data_headers[from_dim] = []
+            containers = []
+            for idx, headers in enumerate(from_result.get_all_headers(dim=from_dim)):
+                hc = _HeaderContainer()
+                hc.extend(headers)
+                containers.append(hc)
+            self.data_headers[from_dim] = containers
+    def result(self) -> _DataWithHeaders:
+        """
+        Returns the data with headers.
+        Returns:
+            _DataWithHeaders: The data, data headers, grand totals and grand total headers.
+        """
+        return _DataWithHeaders(
+            data=self.data,
+            data_headers=(cast(_DataHeaderContainers, self.data_headers[0]), self.data_headers[1]),
+            grand_totals=(self.grand_totals[0], self.grand_totals[1]),
+            grand_total_headers=(self.grand_totals_headers[0], self.grand_totals_headers[1]),
+        )
 @define
 class DataFrameMetadata:
     """
@@ -194,11 +437,13 @@ class DataFrameMetadata:
                                     | AVG     |    150
                           SUM       |         |    450
+      column_totals_indexes: Similar to row_totals_indexes but for column headers.
       execution_response: An instance of BareExecutionResponse representing the
                           execution response.
     """
     row_totals_indexes: list[list[int]]
+    column_totals_indexes: list[list[int]]
     execution_response: BareExecutionResponse
     primary_labels_from_index: dict[int, dict[str, str]]
     primary_labels_from_columns: dict[int, dict[str, str]]
@@ -206,27 +451,36 @@ class DataFrameMetadata:
     @classmethod
     def from_data(
         cls,
-        headers: tuple[_DataHeaders, Optional[_DataHeaders]],
+        headers: _HeadersByAxis,
         execution_response: BareExecutionResponse,
         primary_labels_from_index: dict[int, dict[str, str]],
         primary_labels_from_columns: dict[int, dict[str, str]],
     ) -> "DataFrameMetadata":
         """This method constructs a DataFrameMetadata object from data headers and an execution response.
-        Args: headers (Tuple[_DataHeaders, Optional[_DataHeaders]]):
+        Args: headers (_HeadersByAxis):
             A tuple containing data headers. execution_response (BareExecutionResponse): An ExecutionResponse object.
         Returns: DataFrameMetadata: An initialized DataFrameMetadata object."""
-        row_totals_indexes = [
-            [idx for idx, hdr in enumerate(dim) if hdr is not None and "totalHeader" in hdr] for dim in headers[0]
-        ]
+        row_totals_indexes = cls._get_totals_indexes(headers[0])
+        column_totals_indexes = cls._get_totals_indexes(headers[1])
         return cls(
             row_totals_indexes=row_totals_indexes,
+            column_totals_indexes=column_totals_indexes,
             execution_response=execution_response,
             primary_labels_from_index=primary_labels_from_index,
             primary_labels_from_columns=primary_labels_from_columns,
         )
+    @staticmethod
+    def _get_totals_indexes(headers: Optional[Any]) -> list[list[int]]:
+        if headers is None:
+            return []
+        return [
+            [idx for idx, hdr in enumerate(dim) if hdr is not None and hdr.get("totalHeader") is not None]
+            for dim in headers
+        ]
 def _read_complete_execution_result(
     execution_response: BareExecutionResponse,
@@ -234,6 +488,7 @@ def _read_complete_execution_result(
     result_size_dimensions_limits: ResultSizeDimensions,
     result_size_bytes_limit: Optional[int] = None,
     page_size: int = _DEFAULT_PAGE_SIZE,
+    optimized: bool = False,
 ) -> _DataWithHeaders:
     """
     Extracts all data and headers for an execution result. This does page around the execution result to extract
@@ -245,6 +500,10 @@ def _read_complete_execution_result(
         result_size_dimensions_limits (ResultSizeDimensions): Limits for result size dimensions.
         result_size_bytes_limit (Optional[int], optional): Limit for result size in bytes. Defaults to None.
         page_size (int, optional): Page size to use when reading data. Defaults to _DEFAULT_PAGE_SIZE.
+        optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
+            headers in memory as lists of dicts, which can consume a lot of memory for large results.
+            Optimized accumulator stores only unique values and story only reference to them in the list,
+            which can significantly reduce memory usage.
     Returns:
         _DataWithHeaders: All the data and headers from the execution result.
@@ -252,10 +511,10 @@ def _read_complete_execution_result(
     num_dims = len(execution_response.dimensions)
     offset = [0] * num_dims
     limit = [page_size] * num_dims
-    acc = _AccumulatedData()
-    result_size_limits_checked = False
+    acc = _OptimizedAccumulatedData() if optimized else _AccumulatedData()
+    result_size_limits_checked = False
     while True:
         # top-level loop pages through the first dimension;
         #
@@ -303,7 +562,6 @@ def _read_complete_execution_result(
             break
         offset = [result.next_page_start(dim=0), 0] if num_dims > 1 else [result.next_page_start(dim=0)]
     return acc.result()
@@ -339,14 +597,14 @@ def _create_header_mapper(
     attribute_labels = label_overrides.get("labels", {})
     measure_labels = label_overrides.get("metrics", {})
-    def _mapper(header: Any, header_idx: Optional[int]) -> Optional[str]:
+    def _mapper(header: Union[dict, _Header, None], header_idx: Optional[int]) -> Optional[str]:
         label = None
         if header is None:
             pass
-        elif "attributeHeader" in header:
-            if "labelValue" in header["attributeHeader"]:
-                label_value = header["attributeHeader"]["labelValue"]
-                primary_label_value = header["attributeHeader"]["primaryLabelValue"]
+        elif attribute_header := header.get("attributeHeader"):
+            if "labelValue" in attribute_header:
+                label_value = attribute_header["labelValue"]
+                primary_label_value = attribute_header["primaryLabelValue"]
                 label = primary_label_value if use_primary_labels_in_attributes else label_value
                 if header_idx is not None:
                     if header_idx in primary_attribute_labels_mapping:
@@ -359,17 +617,18 @@ def _create_header_mapper(
                 # Excel formatter apply call failure
                 if label is None:
                     label = " "
-            elif "labelName" in header["attributeHeader"]:
-                attr_local_id = header["attributeHeader"]["localIdentifier"]
+            elif "labelName" in attribute_header:
+                attr_local_id = attribute_header["localIdentifier"]
                 if use_local_ids_in_headers:
                     label = attr_local_id
                 else:
                     if attr_local_id in attribute_labels:
                         label = attribute_labels[attr_local_id]["title"]
                     else:
-                        label = header["attributeHeader"]["labelName"]
-        elif "measureHeader" in header and header_idx is not None:
-            measure_idx = header["measureHeader"]["measureIndex"]
+                        label = attribute_header["labelName"]
+        elif (measure_header := header.get("measureHeader")) and header_idx is not None:
+            measure_idx = measure_header["measureIndex"]
             measure_descriptor = dim_descriptor["headers"][header_idx]["measureGroupHeaders"][measure_idx]
             if use_local_ids_in_headers:
@@ -381,8 +640,9 @@ def _create_header_mapper(
                     label = measure_descriptor["name"]
                 else:
                     label = measure_descriptor["localIdentifier"]
-        elif "totalHeader" in header:
-            label = header["totalHeader"]["function"]
+        elif total_header := header.get("totalHeader"):
+            label = total_header["function"]
         return label
     return _mapper
@@ -390,7 +650,7 @@ def _create_header_mapper(
 def _headers_to_index(
     dim_idx: int,
-    headers: tuple[_DataHeaders, Optional[_DataHeaders]],
+    headers: _HeadersByAxis,
     response: BareExecutionResponse,
     label_overrides: LabelOverrides,
     use_local_ids_in_headers: bool = False,
@@ -432,7 +692,7 @@ def _headers_to_index(
     return pandas.MultiIndex.from_arrays(
         [
             tuple(mapper(header, header_idx) for header in header_group)
-            for header_idx, header_group in enumerate(cast(_DataHeaders, headers[dim_idx]))
+            for header_idx, header_group in enumerate(cast(list, headers[dim_idx]))
         ],
         names=[mapper(dim_header, None) for dim_header in (response.dimensions[dim_idx]["headers"])],
     ), primary_attribute_labels_mapping
@@ -465,17 +725,17 @@ def _merge_grand_totals_into_data(extract: _DataWithHeaders) -> Union[_DataArray
     return data
-def _merge_grand_total_headers_into_headers(extract: _DataWithHeaders) -> tuple[_DataHeaders, Optional[_DataHeaders]]:
+def _merge_grand_total_headers_into_headers(extract: _DataWithHeaders) -> _HeadersByAxis:
     """Merges grand total headers into data headers. This function will mutate the extracted data.
     Args:
         extract (_DataWithHeaders): The data along with its headers that need to be merged.
     Returns:
-        Tuple[_DataHeaders, Optional[_DataHeaders]]:
+        _HeadersByAxis:
             A tuple containing the modified data headers and the grand total headers if present.
     """
-    headers: tuple[_DataHeaders, Optional[_DataHeaders]] = extract.data_headers
+    headers: _HeadersByAxis = extract.data_headers
     for dim_idx, grand_total_headers in enumerate(extract.grand_total_headers):
         if grand_total_headers is None:
@@ -496,6 +756,7 @@ def convert_execution_response_to_dataframe(
     use_local_ids_in_headers: bool = False,
     use_primary_labels_in_attributes: bool = False,
     page_size: int = _DEFAULT_PAGE_SIZE,
+    optimized: bool = False,
 ) -> tuple[pandas.DataFrame, DataFrameMetadata]:
     """
     Converts execution result to a pandas dataframe, maintaining the dimensionality of the result.
@@ -511,6 +772,10 @@ def convert_execution_response_to_dataframe(
         use_primary_labels_in_attributes (bool, default=False): Use primary labels in attributes if True, else use
             default settings.
         page_size (int, default=_DEFAULT_PAGE_SIZE): Size of the page.
+        optimized (bool, default=False): Use memory optimized accumulator if True; by default, the accumulator stores
+            headers in memory as lists of dicts, which can consume a lot of memory for large results.
+            Optimized accumulator stores only unique values and story only reference to them in the list,
+            which can significantly reduce memory usage.
     Returns:
         Tuple[pandas.DataFrame, DataFrameMetadata]: A tuple containing the created dataframe and its metadata.
@@ -521,7 +786,9 @@ def convert_execution_response_to_dataframe(
         result_size_dimensions_limits=result_size_dimensions_limits,
         result_size_bytes_limit=result_size_bytes_limit,
         page_size=page_size,
+        optimized=optimized,
     )
     full_data = _merge_grand_totals_into_data(extract)
     full_headers = _merge_grand_total_headers_into_headers(extract)

{gooddata_pandas-1.47.0.dist-info → gooddata_pandas-1.55.1.dev2.dist-info}/METADATA RENAMED Viewed

@@ -1,42 +1,28 @@
 Metadata-Version: 2.4
 Name: gooddata-pandas
-Version: 1.47.0
+Version: 1.55.1.dev2
 Summary: GoodData Cloud to pandas
-Author: GoodData
-Author-email: support@gooddata.com
-License: MIT
-Project-URL: Documentation, https://gooddata-pandas.readthedocs.io/en/v1.47.0
+Project-URL: Documentation, https://gooddata-pandas.readthedocs.io/en/v1.55.1.dev2
 Project-URL: Source, https://github.com/gooddata/gooddata-python-sdk
-Keywords: gooddata,pandas,series,data,frame,data_frame,analytics,headless,business,intelligence,headless-bi,cloud,native,semantic,layer,sql,metrics
+Author-email: GoodData <support@gooddata.com>
+License-Expression: MIT
+License-File: LICENSE.txt
+Keywords: analytics,business,cloud,data,data_frame,frame,gooddata,headless,headless-bi,intelligence,layer,metrics,native,pandas,semantic,series,sql
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Environment :: Console
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Topic :: Database
 Classifier: Topic :: Scientific/Engineering
 Classifier: Topic :: Software Development
 Classifier: Typing :: Typed
-Requires-Python: >=3.9.0
-Description-Content-Type: text/markdown
-License-File: LICENSE.txt
-Requires-Dist: gooddata-sdk~=1.47.0
+Requires-Python: >=3.10
+Requires-Dist: gooddata-sdk~=1.55.1.dev2
 Requires-Dist: pandas<3.0.0,>=2.0.0
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: keywords
-Dynamic: license
-Dynamic: license-file
-Dynamic: project-url
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
+Description-Content-Type: text/markdown
 # GoodData Pandas
@@ -50,7 +36,7 @@ See [DOCUMENTATION](https://gooddata-pandas.readthedocs.io/en/latest/) for more
 -  GoodData.CN installation; either running on your cloud
    infrastructure or the free Community Edition running on your workstation
--  Python 3.9 or newer
+-  Python 3.10 or newer
 ## Installation

gooddata_pandas-1.55.1.dev2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+gooddata_pandas/__init__.py,sha256=Ta3qIIDq7kBRUsYSV3aC69AQBFvFvhtWDQucgP-l88w,297
+gooddata_pandas/_version.py,sha256=960vTs6l7xsN2BOXWCxOc4PSKdzzKhnNEPTMnmMTCQs,119
+gooddata_pandas/data_access.py,sha256=VPFjlOVH4dsQvbspEkT6UG_g3yA9sE5g8OLSrqKaeH4,20129
+gooddata_pandas/dataframe.py,sha256=_riBCtkV7zJZ8YlvacPfpxs0gENMgV0W4nNii6Ei-2A,18074
+gooddata_pandas/good_pandas.py,sha256=2GzISAD9J2CQy3KM8kuelPazOFfjA5g4v_p3TyINBW8,3474
+gooddata_pandas/py.typed,sha256=u_MS29sadlaIqGRPYFjWml5u0gQnoQfvbsf9pu3TZJU,94
+gooddata_pandas/result_convertor.py,sha256=Tv6Ee3JxxFbKoPmXz0R0fl7x7HnI0-5pHycFu-QsFus,34928
+gooddata_pandas/series.py,sha256=ELBSg1jKy-AYrtXErpNhsmQ0Zd6mP1M6FNS6bGgNPyI,6780
+gooddata_pandas/utils.py,sha256=JhWs0WYqg-9o3aWRP21ERFAxCKoT5oNKQ5mKlJh4uT4,8091
+gooddata_pandas-1.55.1.dev2.dist-info/METADATA,sha256=fnYJMMShmWqrTrJ9zKsVctAz2rRNT4ayXvGtW0DLvXQ,2847
+gooddata_pandas-1.55.1.dev2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+gooddata_pandas-1.55.1.dev2.dist-info/licenses/LICENSE.txt,sha256=3RjzQk8y9HG1_LgqvbEqWZKJnTQGOO1cpzYzBc13Myk,149825
+gooddata_pandas-1.55.1.dev2.dist-info/RECORD,,

{gooddata_pandas-1.47.0.dist-info → gooddata_pandas-1.55.1.dev2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,4 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

gooddata_pandas-1.47.0.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-gooddata_pandas/__init__.py,sha256=Ta3qIIDq7kBRUsYSV3aC69AQBFvFvhtWDQucgP-l88w,297
-gooddata_pandas/_version.py,sha256=960vTs6l7xsN2BOXWCxOc4PSKdzzKhnNEPTMnmMTCQs,119
-gooddata_pandas/data_access.py,sha256=VPFjlOVH4dsQvbspEkT6UG_g3yA9sE5g8OLSrqKaeH4,20129
-gooddata_pandas/dataframe.py,sha256=EsOgO8O42JBg1as0RZVwbeVOlGlENpkEsvlL-Xi5Jsg,16679
-gooddata_pandas/good_pandas.py,sha256=2GzISAD9J2CQy3KM8kuelPazOFfjA5g4v_p3TyINBW8,3474
-gooddata_pandas/py.typed,sha256=u_MS29sadlaIqGRPYFjWml5u0gQnoQfvbsf9pu3TZJU,94
-gooddata_pandas/result_convertor.py,sha256=r7uFrjeM6cxMy08YcS3LywF1iUPSyEyG3BAddh0DkIQ,25807
-gooddata_pandas/series.py,sha256=ELBSg1jKy-AYrtXErpNhsmQ0Zd6mP1M6FNS6bGgNPyI,6780
-gooddata_pandas/utils.py,sha256=JhWs0WYqg-9o3aWRP21ERFAxCKoT5oNKQ5mKlJh4uT4,8091
-gooddata_pandas-1.47.0.dist-info/licenses/LICENSE.txt,sha256=3RjzQk8y9HG1_LgqvbEqWZKJnTQGOO1cpzYzBc13Myk,149825
-gooddata_pandas-1.47.0.dist-info/METADATA,sha256=TW89fiNcfnRNuTio6dwsR7JnZGFnpXpiCO-m8bVz5iI,3133
-gooddata_pandas-1.47.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-gooddata_pandas-1.47.0.dist-info/top_level.txt,sha256=B7K_WFxlxplJbEbv5Mf0YhX74dbOpTPgDX-W6I7CssI,16
-gooddata_pandas-1.47.0.dist-info/RECORD,,

gooddata_pandas-1.47.0.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- gooddata_pandas

{gooddata_pandas-1.47.0.dist-info → gooddata_pandas-1.55.1.dev2.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

gooddata-pandas 1.47.0__py3-none-any.whl → 1.55.1.dev2__py3-none-any.whl

gooddata-pandas 1.47.0py3-none-any.whl → 1.55.1.dev2py3-none-any.whl