PyPI - influxdb3-python - Versions diffs - 0.6.1__tar.gz → 0.7.0__tar.gz - Mend

influxdb3-python 0.6.1tar.gz → 0.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: influxdb3-python
-Version: 0.6.1
+Version: 0.7.0
 Summary: Community Python client for InfluxDB 3.0
 Home-page: https://github.com/InfluxCommunity/influxdb3-python
 Author: InfluxData

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb3_python.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: influxdb3-python
-Version: 0.6.1
+Version: 0.7.0
 Summary: Community Python client for InfluxDB 3.0
 Home-page: https://github.com/InfluxCommunity/influxdb3-python
 Author: InfluxData

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb3_python.egg-info/SOURCES.txt RENAMED Viewed

@@ -34,6 +34,7 @@ influxdb_client_3/write_client/client/util/multiprocessing_helper.py
 influxdb_client_3/write_client/client/write/__init__.py
 influxdb_client_3/write_client/client/write/dataframe_serializer.py
 influxdb_client_3/write_client/client/write/point.py
+influxdb_client_3/write_client/client/write/polars_dataframe_serializer.py
 influxdb_client_3/write_client/client/write/retry.py
 influxdb_client_3/write_client/domain/__init__.py
 influxdb_client_3/write_client/domain/write_precision.py
@@ -50,4 +51,6 @@ tests/test_influxdb_client_3.py
 tests/test_influxdb_client_3_integration.py
 tests/test_merge_options.py
 tests/test_point.py
-tests/test_query.py
+tests/test_polars_dataframe_serializer.py
+tests/test_query.py
+tests/test_write_file.py

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/__init__.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import urllib.parse
 import pyarrow as pa
 import importlib.util

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/version.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """Version of the Client that is used in User-Agent header."""
-VERSION = '0.6.1'
+VERSION = '0.7.0'
 USER_AGENT = f'influxdb3-python/{VERSION}'

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/_base.py RENAMED Viewed

@@ -7,8 +7,7 @@ import logging
 import os
 from typing import Iterable
-from influxdb_client_3.write_client.client.write.dataframe_serializer import DataframeSerializer, \
-    PolarsDataframeSerializer
+from influxdb_client_3.write_client.client.write.dataframe_serializer import DataframeSerializer
 from influxdb_client_3.write_client.configuration import Configuration
 from influxdb_client_3.write_client.rest import _UTF_8_encoding
 from influxdb_client_3.write_client.service.write_service import WriteService
@@ -249,6 +248,7 @@ class _BaseWriteApi(object):
             self._serialize(Point.from_dict(record, write_precision=write_precision, **kwargs),
                             write_precision, payload, **kwargs)
         elif 'polars' in str(type(record)):
+            from influxdb_client_3.write_client.client.write.dataframe_serializer import PolarsDataframeSerializer
             serializer = PolarsDataframeSerializer(record, self._point_settings, write_precision, **kwargs)
             self._serialize(serializer.serialize(), write_precision, payload, **kwargs)

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/write/dataframe_serializer.py RENAMED Viewed

@@ -132,7 +132,7 @@ class DataframeSerializer:
         keys = []
         # tags holds a list of tag f-string segments ordered alphabetically by tag key.
         tags = []
-        # fields holds a list of field f-string segments  ordered alphebetically by field key
+        # fields holds a list of field f-string segments ordered alphabetically by field key
         fields = []
         # field_indexes holds the index into each row of all the fields.
         field_indexes = []
@@ -160,6 +160,11 @@ class DataframeSerializer:
         # null_columns has a bool value for each column holding
         # whether that column contains any null (NaN or None) values.
         null_columns = data_frame.isnull().any()
+        # inf_columns has a bool value for each column holding
+        # whether that column contains any Inf values.
+        inf_columns = data_frame.isin([np.inf, -np.inf]).any()
         timestamp_index = 0
         # Iterate through the columns building up the expression for each column.
@@ -175,9 +180,10 @@ class DataframeSerializer:
             if key in data_frame_tag_columns:
                 # This column is a tag column.
-                if null_columns.iloc[index]:
+                if null_columns.iloc[index] or inf_columns.iloc[index]:
                     key_value = f"""{{
-                            '' if {val_format} == '' or pd.isna({val_format}) else
+                            '' if {val_format} == '' or pd.isna({val_format}) or
+                            ({inf_columns.iloc[index]} and np.isinf({val_format})) else
                             f',{key_format}={{str({val_format}).translate(_ESCAPE_STRING)}}'
                         }}"""
                 else:
@@ -199,16 +205,17 @@ class DataframeSerializer:
             if (issubclass(value.type, np.integer) or issubclass(value.type, np.floating) or
                     issubclass(value.type, np.bool_)):
                 suffix = 'i' if issubclass(value.type, np.integer) else ''
-                if null_columns.iloc[index]:
+                if null_columns.iloc[index] or inf_columns.iloc[index]:
                     field_value = (
-                        f"""{{"" if pd.isna({val_format}) else f"{sep}{key_format}={{{val_format}}}{suffix}"}}"""
+                        f"""{{"" if pd.isna({val_format}) or ({inf_columns.iloc[index]} and np.isinf({val_format})) else
+                        f"{sep}{key_format}={{{val_format}}}{suffix}"}}"""
                     )
                 else:
                     field_value = f'{sep}{key_format}={{{val_format}}}{suffix}'
             else:
-                if null_columns.iloc[index]:
+                if null_columns.iloc[index] or inf_columns.iloc[index]:
                     field_value = f"""{{
-                            '' if pd.isna({val_format}) else
+                            '' if pd.isna({val_format}) or ({inf_columns.iloc[index]} and np.isinf({val_format})) else
                             f'{sep}{key_format}="{{str({val_format}).translate(_ESCAPE_STRING)}}"'
                         }}"""
                 else:
@@ -234,11 +241,12 @@ class DataframeSerializer:
             '_ESCAPE_STRING': _ESCAPE_STRING,
             'keys': keys,
             'pd': pd,
+            'np': np,
         })
         for k, v in dict(data_frame.dtypes).items():
             if k in data_frame_tag_columns:
-                data_frame[k].replace('', np.nan, inplace=True)
+                data_frame[k] = data_frame[k].apply(lambda x: np.nan if x == '' else x)
         self.data_frame = data_frame
         self.f = f
@@ -284,137 +292,6 @@ class DataframeSerializer:
         return self.number_of_chunks
-class PolarsDataframeSerializer:
-    """Serialize DataFrame into LineProtocols."""
-    def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, chunk_size: int = None,
-                 **kwargs) -> None:
-        """
-        Init serializer.
-        :param data_frame: Polars DataFrame to serialize
-        :param point_settings: Default Tags
-        :param precision: The precision for the unix timestamps within the body line-protocol.
-        :param chunk_size: The size of chunk for serializing into chunks.
-        :key data_frame_measurement_name: name of measurement for writing Polars DataFrame
-        :key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
-        :key data_frame_timestamp_column: name of DataFrame column which contains a timestamp.
-        :key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column
-        """
-        self.data_frame = data_frame
-        self.point_settings = point_settings
-        self.precision = precision
-        self.chunk_size = chunk_size
-        self.measurement_name = kwargs.get("data_frame_measurement_name", "measurement")
-        self.tag_columns = kwargs.get("data_frame_tag_columns", [])
-        self.timestamp_column = kwargs.get("data_frame_timestamp_column", None)
-        self.timestamp_timezone = kwargs.get("data_frame_timestamp_timezone", None)
-        self.column_indices = {name: index for index, name in enumerate(data_frame.columns)}
-        if self.timestamp_column is None or self.timestamp_column not in self.column_indices:
-            raise ValueError(
-                f"Timestamp column {self.timestamp_column} not found in DataFrame. Please define a valid timestamp "
-                f"column.")
-        #
-        # prepare chunks
-        #
-        if chunk_size is not None:
-            self.number_of_chunks = int(math.ceil(len(data_frame) / float(chunk_size)))
-            self.chunk_size = chunk_size
-        else:
-            self.number_of_chunks = None
-    def escape_key(self, value):
-        return str(value).translate(_ESCAPE_KEY)
-    def escape_value(self, value):
-        return str(value).translate(_ESCAPE_STRING)
-    def to_line_protocol(self, row):
-        # Filter out None or empty values for tags
-        tags = ""
-        tags = ",".join(
-            f'{self.escape_key(col)}={self.escape_key(row[self.column_indices[col]])}'
-            for col in self.tag_columns
-            if row[self.column_indices[col]] is not None and row[self.column_indices[col]] != ""
-        )
-        if self.point_settings.defaultTags:
-            default_tags = ",".join(
-                f'{self.escape_key(key)}={self.escape_key(value)}'
-                for key, value in self.point_settings.defaultTags.items()
-            )
-            # Ensure there's a comma between existing tags and default tags if both are present
-            if tags and default_tags:
-                tags += ","
-            tags += default_tags
-        # add escape symbols for special characters to tags
-        fields = ",".join(
-            f"{col}=\"{self.escape_value(row[self.column_indices[col]])}\"" if isinstance(row[self.column_indices[col]],
-                                                                                          str)
-            else f"{col}={str(row[self.column_indices[col]]).lower()}" if isinstance(row[self.column_indices[col]],
-                                                                                     bool)  # Check for bool first
-            else f"{col}={row[self.column_indices[col]]}i" if isinstance(row[self.column_indices[col]], int)
-            else f"{col}={row[self.column_indices[col]]}"
-            for col in self.column_indices
-            if col not in self.tag_columns + [self.timestamp_column] and
-            row[self.column_indices[col]] is not None and row[self.column_indices[col]] != ""
-        )
-        # Access the Unix timestamp
-        timestamp = row[self.column_indices[self.timestamp_column]]
-        if tags != "":
-            line_protocol = f"{self.measurement_name},{tags} {fields} {timestamp}"
-        else:
-            line_protocol = f"{self.measurement_name} {fields} {timestamp}"
-        return line_protocol
-    def serialize(self, chunk_idx: int = None):
-        from ...extras import pl
-        df = self.data_frame
-        # Check if the timestamp column is already an integer
-        if df[self.timestamp_column].dtype in [pl.Int32, pl.Int64]:
-            # The timestamp column is already an integer, assuming it's in Unix format
-            pass
-        else:
-            # Convert timestamp to Unix timestamp based on specified precision
-            if self.precision in [None, 'ns']:
-                df = df.with_columns(
-                    pl.col(self.timestamp_column).dt.epoch(time_unit="ns").alias(self.timestamp_column))
-            elif self.precision == 'us':
-                df = df.with_columns(
-                    pl.col(self.timestamp_column).dt.epoch(time_unit="us").alias(self.timestamp_column))
-            elif self.precision == 'ms':
-                df = df.with_columns(
-                    pl.col(self.timestamp_column).dt.epoch(time_unit="ms").alias(self.timestamp_column))
-            elif self.precision == 's':
-                df = df.with_columns(pl.col(self.timestamp_column).dt.epoch(time_unit="s").alias(self.timestamp_column))
-            else:
-                raise ValueError(f"Unsupported precision: {self.precision}")
-        if chunk_idx is None:
-            chunk = df
-        else:
-            logger.debug("Serialize chunk %s/%s ...", chunk_idx + 1, self.number_of_chunks)
-            chunk = df[chunk_idx * self.chunk_size:(chunk_idx + 1) * self.chunk_size]
-        # Apply the UDF to each row
-        line_protocol_expr = chunk.apply(self.to_line_protocol, return_dtype=pl.Object)
-        lp = line_protocol_expr['map'].to_list()
-        return lp
 def data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, **kwargs):
     """
     Serialize DataFrame into LineProtocols.
@@ -430,20 +307,3 @@ def data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_W
     :key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column - ``DataFrame``
     """  # noqa: E501
     return DataframeSerializer(data_frame, point_settings, precision, **kwargs).serialize()
-def polars_data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, **kwargs):
-    """
-    Serialize DataFrame into LineProtocols.
-    :param data_frame: Pandas DataFrame to serialize
-    :param point_settings: Default Tags
-    :param precision: The precision for the unix timestamps within the body line-protocol.
-    :key data_frame_measurement_name: name of measurement for writing Pandas DataFrame
-    :key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
-    :key data_frame_timestamp_column: name of DataFrame column which contains a timestamp. The column can be defined as a :class:`~str` value
-                                      formatted as `2018-10-26`, `2018-10-26 12:00`, `2018-10-26 12:00:00-05:00`
-                                      or other formats and types supported by `pandas.to_datetime <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html#pandas.to_datetime>`_ - ``DataFrame``
-    :key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column - ``DataFrame``
-    """  # noqa: E501
-    return PolarsDataframeSerializer(data_frame, point_settings, precision, **kwargs).serialize()

influxdb3_python-0.7.0/influxdb_client_3/write_client/client/write/polars_dataframe_serializer.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""
+Functions for serialize Polars DataFrame.
+Much of the code here is inspired by that in the aioinflux packet found here: https://github.com/gusutabopb/aioinflux
+"""
+import logging
+import math
+from influxdb_client_3.write_client.client.write.point import _ESCAPE_KEY, _ESCAPE_STRING, DEFAULT_WRITE_PRECISION
+logger = logging.getLogger('influxdb_client.client.write.polars_dataframe_serializer')
+class PolarsDataframeSerializer:
+    """Serialize DataFrame into LineProtocols."""
+    def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, chunk_size: int = None,
+                 **kwargs) -> None:
+        """
+        Init serializer.
+        :param data_frame: Polars DataFrame to serialize
+        :param point_settings: Default Tags
+        :param precision: The precision for the unix timestamps within the body line-protocol.
+        :param chunk_size: The size of chunk for serializing into chunks.
+        :key data_frame_measurement_name: name of measurement for writing Polars DataFrame
+        :key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
+        :key data_frame_timestamp_column: name of DataFrame column which contains a timestamp.
+        :key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column
+        """
+        self.data_frame = data_frame
+        self.point_settings = point_settings
+        self.precision = precision
+        self.chunk_size = chunk_size
+        self.measurement_name = kwargs.get("data_frame_measurement_name", "measurement")
+        self.tag_columns = kwargs.get("data_frame_tag_columns", [])
+        self.timestamp_column = kwargs.get("data_frame_timestamp_column", None)
+        self.timestamp_timezone = kwargs.get("data_frame_timestamp_timezone", None)
+        self.column_indices = {name: index for index, name in enumerate(data_frame.columns)}
+        if self.timestamp_column is None or self.timestamp_column not in self.column_indices:
+            raise ValueError(
+                f"Timestamp column {self.timestamp_column} not found in DataFrame. Please define a valid timestamp "
+                f"column.")
+        #
+        # prepare chunks
+        #
+        if chunk_size is not None:
+            self.number_of_chunks = int(math.ceil(len(data_frame) / float(chunk_size)))
+            self.chunk_size = chunk_size
+        else:
+            self.number_of_chunks = None
+    def escape_key(self, value):
+        return str(value).translate(_ESCAPE_KEY)
+    def escape_value(self, value):
+        return str(value).translate(_ESCAPE_STRING)
+    def to_line_protocol(self, row):
+        # Filter out None or empty values for tags
+        tags = ""
+        tags = ",".join(
+            f'{self.escape_key(col)}={self.escape_key(row[self.column_indices[col]])}'
+            for col in self.tag_columns
+            if row[self.column_indices[col]] is not None and row[self.column_indices[col]] != ""
+        )
+        if self.point_settings.defaultTags:
+            default_tags = ",".join(
+                f'{self.escape_key(key)}={self.escape_key(value)}'
+                for key, value in self.point_settings.defaultTags.items()
+            )
+            # Ensure there's a comma between existing tags and default tags if both are present
+            if tags and default_tags:
+                tags += ","
+            tags += default_tags
+        # add escape symbols for special characters to tags
+        fields = ",".join(
+            f"{col}=\"{self.escape_value(row[self.column_indices[col]])}\"" if isinstance(row[self.column_indices[col]],
+                                                                                          str)
+            else f"{col}={str(row[self.column_indices[col]]).lower()}" if isinstance(row[self.column_indices[col]],
+                                                                                     bool)  # Check for bool first
+            else f"{col}={row[self.column_indices[col]]}i" if isinstance(row[self.column_indices[col]], int)
+            else f"{col}={row[self.column_indices[col]]}"
+            for col in self.column_indices
+            if col not in self.tag_columns + [self.timestamp_column] and
+            row[self.column_indices[col]] is not None and row[self.column_indices[col]] != ""
+        )
+        # Access the Unix timestamp
+        timestamp = row[self.column_indices[self.timestamp_column]]
+        if tags != "":
+            line_protocol = f"{self.measurement_name},{tags} {fields} {timestamp}"
+        else:
+            line_protocol = f"{self.measurement_name} {fields} {timestamp}"
+        return line_protocol
+    def serialize(self, chunk_idx: int = None):
+        import polars as pl
+        df = self.data_frame
+        # Check if the timestamp column is already an integer
+        if df[self.timestamp_column].dtype in [pl.Int32, pl.Int64]:
+            # The timestamp column is already an integer, assuming it's in Unix format
+            pass
+        else:
+            # Convert timestamp to Unix timestamp based on specified precision
+            if self.precision in [None, 'ns']:
+                df = df.with_columns(
+                    pl.col(self.timestamp_column).dt.epoch(time_unit="ns").alias(self.timestamp_column))
+            elif self.precision == 'us':
+                df = df.with_columns(
+                    pl.col(self.timestamp_column).dt.epoch(time_unit="us").alias(self.timestamp_column))
+            elif self.precision == 'ms':
+                df = df.with_columns(
+                    pl.col(self.timestamp_column).dt.epoch(time_unit="ms").alias(self.timestamp_column))
+            elif self.precision == 's':
+                df = df.with_columns(pl.col(self.timestamp_column).dt.epoch(time_unit="s").alias(self.timestamp_column))
+            else:
+                raise ValueError(f"Unsupported precision: {self.precision}")
+        if chunk_idx is None:
+            chunk = df
+        else:
+            logger.debug("Serialize chunk %s/%s ...", chunk_idx + 1, self.number_of_chunks)
+            chunk = df[chunk_idx * self.chunk_size:(chunk_idx + 1) * self.chunk_size]
+        # Apply the UDF to each row
+        line_protocol_expr = chunk.map_rows(self.to_line_protocol, return_dtype=pl.Object)
+        lp = line_protocol_expr['map'].to_list()
+        return lp
+def polars_data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, **kwargs):
+    """
+    Serialize DataFrame into LineProtocols.
+    :param data_frame: Pandas DataFrame to serialize
+    :param point_settings: Default Tags
+    :param precision: The precision for the unix timestamps within the body line-protocol.
+    :key data_frame_measurement_name: name of measurement for writing Pandas DataFrame
+    :key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
+    :key data_frame_timestamp_column: name of DataFrame column which contains a timestamp. The column can be defined as a :class:`~str` value
+                                      formatted as `2018-10-26`, `2018-10-26 12:00`, `2018-10-26 12:00:00-05:00`
+                                      or other formats and types supported by `pandas.to_datetime <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html#pandas.to_datetime>`_ - ``DataFrame``
+    :key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column - ``DataFrame``
+    """  # noqa: E501
+    return PolarsDataframeSerializer(data_frame, point_settings, precision, **kwargs).serialize()

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/write_api.py RENAMED Viewed

@@ -19,8 +19,7 @@ from reactivex.subject import Subject
 from influxdb_client_3.write_client.domain import WritePrecision
 from influxdb_client_3.write_client.client._base import _BaseWriteApi, _HAS_DATACLASS
 from influxdb_client_3.write_client.client.util.helpers import get_org_query_param
-from influxdb_client_3.write_client.client.write.dataframe_serializer import (DataframeSerializer,
-                                                                              PolarsDataframeSerializer)
+from influxdb_client_3.write_client.client.write.dataframe_serializer import DataframeSerializer
 from influxdb_client_3.write_client.client.write.point import Point, DEFAULT_WRITE_PRECISION
 from influxdb_client_3.write_client.client.write.retry import WritesRetry
 from influxdb_client_3.write_client.rest import _UTF_8_encoding
@@ -462,6 +461,7 @@ You can use native asynchronous version of the client:
                                  precision, **kwargs)
         elif 'polars' in str(type(data)):
+            from influxdb_client_3.write_client.client.write.dataframe_serializer import PolarsDataframeSerializer
             serializer = PolarsDataframeSerializer(data,
                                                    self._point_settings, precision,
                                                    self._write_options.batch_size, **kwargs)

{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/extras.py RENAMED Viewed

@@ -10,9 +10,4 @@ try:
 except ModuleNotFoundError as err:
     raise ImportError(f"`data_frame` requires numpy which couldn't be imported due: {err}")
-try:
-    import polars as pl
-except ModuleNotFoundError as err:
-    raise ImportError(f"`polars_frame` requires polars which couldn't be imported due: {err}")
-__all__ = ['pd', 'np', 'pl']
+__all__ = ['pd', 'np']

influxdb3-python 0.6.1__tar.gz → 0.7.0__tar.gz

influxdb3-python 0.6.1tar.gz → 0.7.0tar.gz