influxdb3-python 0.6.1__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/PKG-INFO +1 -1
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb3_python.egg-info/PKG-INFO +1 -1
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb3_python.egg-info/SOURCES.txt +4 -1
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/__init__.py +0 -1
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/version.py +1 -1
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/_base.py +2 -2
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/write/dataframe_serializer.py +16 -156
- influxdb3_python-0.7.0/influxdb_client_3/write_client/client/write/polars_dataframe_serializer.py +160 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/write_api.py +2 -2
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/extras.py +1 -6
- influxdb3_python-0.7.0/tests/test_dataframe_serializer.py +515 -0
- influxdb3_python-0.7.0/tests/test_polars_dataframe_serializer.py +31 -0
- influxdb3_python-0.7.0/tests/test_write_file.py +66 -0
- influxdb3_python-0.6.1/tests/test_dataframe_serializer.py +0 -45
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/LICENSE +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/README.md +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb3_python.egg-info/dependency_links.txt +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb3_python.egg-info/requires.txt +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb3_python.egg-info/top_level.txt +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/py.typed +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/query/__init__.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/query/query_api.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/read_file.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/__init__.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/_sync/__init__.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/_sync/api_client.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/_sync/rest.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/__init__.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/exceptions.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/influxdb_client.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/logging_handler.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/util/__init__.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/util/date_utils.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/util/date_utils_pandas.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/util/helpers.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/util/multiprocessing_helper.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/warnings.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/write/__init__.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/write/point.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/write/retry.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/configuration.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/domain/__init__.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/domain/write_precision.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/rest.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/service/__init__.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/service/_base_service.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/service/signin_service.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/service/signout_service.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/service/write_service.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/setup.cfg +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/setup.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/tests/test_api_client.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/tests/test_date_helper.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/tests/test_deep_merge.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/tests/test_influxdb_client_3.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/tests/test_influxdb_client_3_integration.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/tests/test_merge_options.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/tests/test_point.py +0 -0
- {influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/tests/test_query.py +0 -0
|
@@ -34,6 +34,7 @@ influxdb_client_3/write_client/client/util/multiprocessing_helper.py
|
|
|
34
34
|
influxdb_client_3/write_client/client/write/__init__.py
|
|
35
35
|
influxdb_client_3/write_client/client/write/dataframe_serializer.py
|
|
36
36
|
influxdb_client_3/write_client/client/write/point.py
|
|
37
|
+
influxdb_client_3/write_client/client/write/polars_dataframe_serializer.py
|
|
37
38
|
influxdb_client_3/write_client/client/write/retry.py
|
|
38
39
|
influxdb_client_3/write_client/domain/__init__.py
|
|
39
40
|
influxdb_client_3/write_client/domain/write_precision.py
|
|
@@ -50,4 +51,6 @@ tests/test_influxdb_client_3.py
|
|
|
50
51
|
tests/test_influxdb_client_3_integration.py
|
|
51
52
|
tests/test_merge_options.py
|
|
52
53
|
tests/test_point.py
|
|
53
|
-
tests/
|
|
54
|
+
tests/test_polars_dataframe_serializer.py
|
|
55
|
+
tests/test_query.py
|
|
56
|
+
tests/test_write_file.py
|
{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/_base.py
RENAMED
|
@@ -7,8 +7,7 @@ import logging
|
|
|
7
7
|
import os
|
|
8
8
|
from typing import Iterable
|
|
9
9
|
|
|
10
|
-
from influxdb_client_3.write_client.client.write.dataframe_serializer import DataframeSerializer
|
|
11
|
-
PolarsDataframeSerializer
|
|
10
|
+
from influxdb_client_3.write_client.client.write.dataframe_serializer import DataframeSerializer
|
|
12
11
|
from influxdb_client_3.write_client.configuration import Configuration
|
|
13
12
|
from influxdb_client_3.write_client.rest import _UTF_8_encoding
|
|
14
13
|
from influxdb_client_3.write_client.service.write_service import WriteService
|
|
@@ -249,6 +248,7 @@ class _BaseWriteApi(object):
|
|
|
249
248
|
self._serialize(Point.from_dict(record, write_precision=write_precision, **kwargs),
|
|
250
249
|
write_precision, payload, **kwargs)
|
|
251
250
|
elif 'polars' in str(type(record)):
|
|
251
|
+
from influxdb_client_3.write_client.client.write.dataframe_serializer import PolarsDataframeSerializer
|
|
252
252
|
serializer = PolarsDataframeSerializer(record, self._point_settings, write_precision, **kwargs)
|
|
253
253
|
self._serialize(serializer.serialize(), write_precision, payload, **kwargs)
|
|
254
254
|
|
|
@@ -132,7 +132,7 @@ class DataframeSerializer:
|
|
|
132
132
|
keys = []
|
|
133
133
|
# tags holds a list of tag f-string segments ordered alphabetically by tag key.
|
|
134
134
|
tags = []
|
|
135
|
-
# fields holds a list of field f-string segments
|
|
135
|
+
# fields holds a list of field f-string segments ordered alphabetically by field key
|
|
136
136
|
fields = []
|
|
137
137
|
# field_indexes holds the index into each row of all the fields.
|
|
138
138
|
field_indexes = []
|
|
@@ -160,6 +160,11 @@ class DataframeSerializer:
|
|
|
160
160
|
# null_columns has a bool value for each column holding
|
|
161
161
|
# whether that column contains any null (NaN or None) values.
|
|
162
162
|
null_columns = data_frame.isnull().any()
|
|
163
|
+
|
|
164
|
+
# inf_columns has a bool value for each column holding
|
|
165
|
+
# whether that column contains any Inf values.
|
|
166
|
+
inf_columns = data_frame.isin([np.inf, -np.inf]).any()
|
|
167
|
+
|
|
163
168
|
timestamp_index = 0
|
|
164
169
|
|
|
165
170
|
# Iterate through the columns building up the expression for each column.
|
|
@@ -175,9 +180,10 @@ class DataframeSerializer:
|
|
|
175
180
|
|
|
176
181
|
if key in data_frame_tag_columns:
|
|
177
182
|
# This column is a tag column.
|
|
178
|
-
if null_columns.iloc[index]:
|
|
183
|
+
if null_columns.iloc[index] or inf_columns.iloc[index]:
|
|
179
184
|
key_value = f"""{{
|
|
180
|
-
'' if {val_format} == '' or pd.isna({val_format})
|
|
185
|
+
'' if {val_format} == '' or pd.isna({val_format}) or
|
|
186
|
+
({inf_columns.iloc[index]} and np.isinf({val_format})) else
|
|
181
187
|
f',{key_format}={{str({val_format}).translate(_ESCAPE_STRING)}}'
|
|
182
188
|
}}"""
|
|
183
189
|
else:
|
|
@@ -199,16 +205,17 @@ class DataframeSerializer:
|
|
|
199
205
|
if (issubclass(value.type, np.integer) or issubclass(value.type, np.floating) or
|
|
200
206
|
issubclass(value.type, np.bool_)):
|
|
201
207
|
suffix = 'i' if issubclass(value.type, np.integer) else ''
|
|
202
|
-
if null_columns.iloc[index]:
|
|
208
|
+
if null_columns.iloc[index] or inf_columns.iloc[index]:
|
|
203
209
|
field_value = (
|
|
204
|
-
f"""{{"" if pd.isna({val_format})
|
|
210
|
+
f"""{{"" if pd.isna({val_format}) or ({inf_columns.iloc[index]} and np.isinf({val_format})) else
|
|
211
|
+
f"{sep}{key_format}={{{val_format}}}{suffix}"}}"""
|
|
205
212
|
)
|
|
206
213
|
else:
|
|
207
214
|
field_value = f'{sep}{key_format}={{{val_format}}}{suffix}'
|
|
208
215
|
else:
|
|
209
|
-
if null_columns.iloc[index]:
|
|
216
|
+
if null_columns.iloc[index] or inf_columns.iloc[index]:
|
|
210
217
|
field_value = f"""{{
|
|
211
|
-
'' if pd.isna({val_format}) else
|
|
218
|
+
'' if pd.isna({val_format}) or ({inf_columns.iloc[index]} and np.isinf({val_format})) else
|
|
212
219
|
f'{sep}{key_format}="{{str({val_format}).translate(_ESCAPE_STRING)}}"'
|
|
213
220
|
}}"""
|
|
214
221
|
else:
|
|
@@ -234,11 +241,12 @@ class DataframeSerializer:
|
|
|
234
241
|
'_ESCAPE_STRING': _ESCAPE_STRING,
|
|
235
242
|
'keys': keys,
|
|
236
243
|
'pd': pd,
|
|
244
|
+
'np': np,
|
|
237
245
|
})
|
|
238
246
|
|
|
239
247
|
for k, v in dict(data_frame.dtypes).items():
|
|
240
248
|
if k in data_frame_tag_columns:
|
|
241
|
-
data_frame[k].
|
|
249
|
+
data_frame[k] = data_frame[k].apply(lambda x: np.nan if x == '' else x)
|
|
242
250
|
|
|
243
251
|
self.data_frame = data_frame
|
|
244
252
|
self.f = f
|
|
@@ -284,137 +292,6 @@ class DataframeSerializer:
|
|
|
284
292
|
return self.number_of_chunks
|
|
285
293
|
|
|
286
294
|
|
|
287
|
-
class PolarsDataframeSerializer:
|
|
288
|
-
"""Serialize DataFrame into LineProtocols."""
|
|
289
|
-
|
|
290
|
-
def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, chunk_size: int = None,
|
|
291
|
-
**kwargs) -> None:
|
|
292
|
-
"""
|
|
293
|
-
Init serializer.
|
|
294
|
-
|
|
295
|
-
:param data_frame: Polars DataFrame to serialize
|
|
296
|
-
:param point_settings: Default Tags
|
|
297
|
-
:param precision: The precision for the unix timestamps within the body line-protocol.
|
|
298
|
-
:param chunk_size: The size of chunk for serializing into chunks.
|
|
299
|
-
:key data_frame_measurement_name: name of measurement for writing Polars DataFrame
|
|
300
|
-
:key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
|
|
301
|
-
:key data_frame_timestamp_column: name of DataFrame column which contains a timestamp.
|
|
302
|
-
:key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column
|
|
303
|
-
"""
|
|
304
|
-
|
|
305
|
-
self.data_frame = data_frame
|
|
306
|
-
self.point_settings = point_settings
|
|
307
|
-
self.precision = precision
|
|
308
|
-
self.chunk_size = chunk_size
|
|
309
|
-
self.measurement_name = kwargs.get("data_frame_measurement_name", "measurement")
|
|
310
|
-
self.tag_columns = kwargs.get("data_frame_tag_columns", [])
|
|
311
|
-
self.timestamp_column = kwargs.get("data_frame_timestamp_column", None)
|
|
312
|
-
self.timestamp_timezone = kwargs.get("data_frame_timestamp_timezone", None)
|
|
313
|
-
|
|
314
|
-
self.column_indices = {name: index for index, name in enumerate(data_frame.columns)}
|
|
315
|
-
|
|
316
|
-
if self.timestamp_column is None or self.timestamp_column not in self.column_indices:
|
|
317
|
-
raise ValueError(
|
|
318
|
-
f"Timestamp column {self.timestamp_column} not found in DataFrame. Please define a valid timestamp "
|
|
319
|
-
f"column.")
|
|
320
|
-
|
|
321
|
-
#
|
|
322
|
-
# prepare chunks
|
|
323
|
-
#
|
|
324
|
-
if chunk_size is not None:
|
|
325
|
-
self.number_of_chunks = int(math.ceil(len(data_frame) / float(chunk_size)))
|
|
326
|
-
self.chunk_size = chunk_size
|
|
327
|
-
else:
|
|
328
|
-
self.number_of_chunks = None
|
|
329
|
-
|
|
330
|
-
def escape_key(self, value):
|
|
331
|
-
return str(value).translate(_ESCAPE_KEY)
|
|
332
|
-
|
|
333
|
-
def escape_value(self, value):
|
|
334
|
-
return str(value).translate(_ESCAPE_STRING)
|
|
335
|
-
|
|
336
|
-
def to_line_protocol(self, row):
|
|
337
|
-
# Filter out None or empty values for tags
|
|
338
|
-
tags = ""
|
|
339
|
-
|
|
340
|
-
tags = ",".join(
|
|
341
|
-
f'{self.escape_key(col)}={self.escape_key(row[self.column_indices[col]])}'
|
|
342
|
-
for col in self.tag_columns
|
|
343
|
-
if row[self.column_indices[col]] is not None and row[self.column_indices[col]] != ""
|
|
344
|
-
)
|
|
345
|
-
|
|
346
|
-
if self.point_settings.defaultTags:
|
|
347
|
-
default_tags = ",".join(
|
|
348
|
-
f'{self.escape_key(key)}={self.escape_key(value)}'
|
|
349
|
-
for key, value in self.point_settings.defaultTags.items()
|
|
350
|
-
)
|
|
351
|
-
# Ensure there's a comma between existing tags and default tags if both are present
|
|
352
|
-
if tags and default_tags:
|
|
353
|
-
tags += ","
|
|
354
|
-
tags += default_tags
|
|
355
|
-
|
|
356
|
-
# add escape symbols for special characters to tags
|
|
357
|
-
|
|
358
|
-
fields = ",".join(
|
|
359
|
-
f"{col}=\"{self.escape_value(row[self.column_indices[col]])}\"" if isinstance(row[self.column_indices[col]],
|
|
360
|
-
str)
|
|
361
|
-
else f"{col}={str(row[self.column_indices[col]]).lower()}" if isinstance(row[self.column_indices[col]],
|
|
362
|
-
bool) # Check for bool first
|
|
363
|
-
else f"{col}={row[self.column_indices[col]]}i" if isinstance(row[self.column_indices[col]], int)
|
|
364
|
-
else f"{col}={row[self.column_indices[col]]}"
|
|
365
|
-
for col in self.column_indices
|
|
366
|
-
if col not in self.tag_columns + [self.timestamp_column] and
|
|
367
|
-
row[self.column_indices[col]] is not None and row[self.column_indices[col]] != ""
|
|
368
|
-
)
|
|
369
|
-
|
|
370
|
-
# Access the Unix timestamp
|
|
371
|
-
timestamp = row[self.column_indices[self.timestamp_column]]
|
|
372
|
-
if tags != "":
|
|
373
|
-
line_protocol = f"{self.measurement_name},{tags} {fields} {timestamp}"
|
|
374
|
-
else:
|
|
375
|
-
line_protocol = f"{self.measurement_name} {fields} {timestamp}"
|
|
376
|
-
|
|
377
|
-
return line_protocol
|
|
378
|
-
|
|
379
|
-
def serialize(self, chunk_idx: int = None):
|
|
380
|
-
from ...extras import pl
|
|
381
|
-
|
|
382
|
-
df = self.data_frame
|
|
383
|
-
|
|
384
|
-
# Check if the timestamp column is already an integer
|
|
385
|
-
if df[self.timestamp_column].dtype in [pl.Int32, pl.Int64]:
|
|
386
|
-
# The timestamp column is already an integer, assuming it's in Unix format
|
|
387
|
-
pass
|
|
388
|
-
else:
|
|
389
|
-
# Convert timestamp to Unix timestamp based on specified precision
|
|
390
|
-
if self.precision in [None, 'ns']:
|
|
391
|
-
df = df.with_columns(
|
|
392
|
-
pl.col(self.timestamp_column).dt.epoch(time_unit="ns").alias(self.timestamp_column))
|
|
393
|
-
elif self.precision == 'us':
|
|
394
|
-
df = df.with_columns(
|
|
395
|
-
pl.col(self.timestamp_column).dt.epoch(time_unit="us").alias(self.timestamp_column))
|
|
396
|
-
elif self.precision == 'ms':
|
|
397
|
-
df = df.with_columns(
|
|
398
|
-
pl.col(self.timestamp_column).dt.epoch(time_unit="ms").alias(self.timestamp_column))
|
|
399
|
-
elif self.precision == 's':
|
|
400
|
-
df = df.with_columns(pl.col(self.timestamp_column).dt.epoch(time_unit="s").alias(self.timestamp_column))
|
|
401
|
-
else:
|
|
402
|
-
raise ValueError(f"Unsupported precision: {self.precision}")
|
|
403
|
-
|
|
404
|
-
if chunk_idx is None:
|
|
405
|
-
chunk = df
|
|
406
|
-
else:
|
|
407
|
-
logger.debug("Serialize chunk %s/%s ...", chunk_idx + 1, self.number_of_chunks)
|
|
408
|
-
chunk = df[chunk_idx * self.chunk_size:(chunk_idx + 1) * self.chunk_size]
|
|
409
|
-
|
|
410
|
-
# Apply the UDF to each row
|
|
411
|
-
line_protocol_expr = chunk.apply(self.to_line_protocol, return_dtype=pl.Object)
|
|
412
|
-
|
|
413
|
-
lp = line_protocol_expr['map'].to_list()
|
|
414
|
-
|
|
415
|
-
return lp
|
|
416
|
-
|
|
417
|
-
|
|
418
295
|
def data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, **kwargs):
|
|
419
296
|
"""
|
|
420
297
|
Serialize DataFrame into LineProtocols.
|
|
@@ -430,20 +307,3 @@ def data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_W
|
|
|
430
307
|
:key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column - ``DataFrame``
|
|
431
308
|
""" # noqa: E501
|
|
432
309
|
return DataframeSerializer(data_frame, point_settings, precision, **kwargs).serialize()
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
def polars_data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, **kwargs):
|
|
436
|
-
"""
|
|
437
|
-
Serialize DataFrame into LineProtocols.
|
|
438
|
-
|
|
439
|
-
:param data_frame: Pandas DataFrame to serialize
|
|
440
|
-
:param point_settings: Default Tags
|
|
441
|
-
:param precision: The precision for the unix timestamps within the body line-protocol.
|
|
442
|
-
:key data_frame_measurement_name: name of measurement for writing Pandas DataFrame
|
|
443
|
-
:key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
|
|
444
|
-
:key data_frame_timestamp_column: name of DataFrame column which contains a timestamp. The column can be defined as a :class:`~str` value
|
|
445
|
-
formatted as `2018-10-26`, `2018-10-26 12:00`, `2018-10-26 12:00:00-05:00`
|
|
446
|
-
or other formats and types supported by `pandas.to_datetime <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html#pandas.to_datetime>`_ - ``DataFrame``
|
|
447
|
-
:key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column - ``DataFrame``
|
|
448
|
-
""" # noqa: E501
|
|
449
|
-
return PolarsDataframeSerializer(data_frame, point_settings, precision, **kwargs).serialize()
|
influxdb3_python-0.7.0/influxdb_client_3/write_client/client/write/polars_dataframe_serializer.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Functions for serialize Polars DataFrame.
|
|
3
|
+
|
|
4
|
+
Much of the code here is inspired by that in the aioinflux packet found here: https://github.com/gusutabopb/aioinflux
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import math
|
|
9
|
+
|
|
10
|
+
from influxdb_client_3.write_client.client.write.point import _ESCAPE_KEY, _ESCAPE_STRING, DEFAULT_WRITE_PRECISION
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger('influxdb_client.client.write.polars_dataframe_serializer')
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PolarsDataframeSerializer:
|
|
16
|
+
"""Serialize DataFrame into LineProtocols."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, chunk_size: int = None,
|
|
19
|
+
**kwargs) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Init serializer.
|
|
22
|
+
|
|
23
|
+
:param data_frame: Polars DataFrame to serialize
|
|
24
|
+
:param point_settings: Default Tags
|
|
25
|
+
:param precision: The precision for the unix timestamps within the body line-protocol.
|
|
26
|
+
:param chunk_size: The size of chunk for serializing into chunks.
|
|
27
|
+
:key data_frame_measurement_name: name of measurement for writing Polars DataFrame
|
|
28
|
+
:key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
|
|
29
|
+
:key data_frame_timestamp_column: name of DataFrame column which contains a timestamp.
|
|
30
|
+
:key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
self.data_frame = data_frame
|
|
34
|
+
self.point_settings = point_settings
|
|
35
|
+
self.precision = precision
|
|
36
|
+
self.chunk_size = chunk_size
|
|
37
|
+
self.measurement_name = kwargs.get("data_frame_measurement_name", "measurement")
|
|
38
|
+
self.tag_columns = kwargs.get("data_frame_tag_columns", [])
|
|
39
|
+
self.timestamp_column = kwargs.get("data_frame_timestamp_column", None)
|
|
40
|
+
self.timestamp_timezone = kwargs.get("data_frame_timestamp_timezone", None)
|
|
41
|
+
|
|
42
|
+
self.column_indices = {name: index for index, name in enumerate(data_frame.columns)}
|
|
43
|
+
|
|
44
|
+
if self.timestamp_column is None or self.timestamp_column not in self.column_indices:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Timestamp column {self.timestamp_column} not found in DataFrame. Please define a valid timestamp "
|
|
47
|
+
f"column.")
|
|
48
|
+
|
|
49
|
+
#
|
|
50
|
+
# prepare chunks
|
|
51
|
+
#
|
|
52
|
+
if chunk_size is not None:
|
|
53
|
+
self.number_of_chunks = int(math.ceil(len(data_frame) / float(chunk_size)))
|
|
54
|
+
self.chunk_size = chunk_size
|
|
55
|
+
else:
|
|
56
|
+
self.number_of_chunks = None
|
|
57
|
+
|
|
58
|
+
def escape_key(self, value):
|
|
59
|
+
return str(value).translate(_ESCAPE_KEY)
|
|
60
|
+
|
|
61
|
+
def escape_value(self, value):
|
|
62
|
+
return str(value).translate(_ESCAPE_STRING)
|
|
63
|
+
|
|
64
|
+
def to_line_protocol(self, row):
|
|
65
|
+
# Filter out None or empty values for tags
|
|
66
|
+
tags = ""
|
|
67
|
+
|
|
68
|
+
tags = ",".join(
|
|
69
|
+
f'{self.escape_key(col)}={self.escape_key(row[self.column_indices[col]])}'
|
|
70
|
+
for col in self.tag_columns
|
|
71
|
+
if row[self.column_indices[col]] is not None and row[self.column_indices[col]] != ""
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if self.point_settings.defaultTags:
|
|
75
|
+
default_tags = ",".join(
|
|
76
|
+
f'{self.escape_key(key)}={self.escape_key(value)}'
|
|
77
|
+
for key, value in self.point_settings.defaultTags.items()
|
|
78
|
+
)
|
|
79
|
+
# Ensure there's a comma between existing tags and default tags if both are present
|
|
80
|
+
if tags and default_tags:
|
|
81
|
+
tags += ","
|
|
82
|
+
tags += default_tags
|
|
83
|
+
|
|
84
|
+
# add escape symbols for special characters to tags
|
|
85
|
+
|
|
86
|
+
fields = ",".join(
|
|
87
|
+
f"{col}=\"{self.escape_value(row[self.column_indices[col]])}\"" if isinstance(row[self.column_indices[col]],
|
|
88
|
+
str)
|
|
89
|
+
else f"{col}={str(row[self.column_indices[col]]).lower()}" if isinstance(row[self.column_indices[col]],
|
|
90
|
+
bool) # Check for bool first
|
|
91
|
+
else f"{col}={row[self.column_indices[col]]}i" if isinstance(row[self.column_indices[col]], int)
|
|
92
|
+
else f"{col}={row[self.column_indices[col]]}"
|
|
93
|
+
for col in self.column_indices
|
|
94
|
+
if col not in self.tag_columns + [self.timestamp_column] and
|
|
95
|
+
row[self.column_indices[col]] is not None and row[self.column_indices[col]] != ""
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Access the Unix timestamp
|
|
99
|
+
timestamp = row[self.column_indices[self.timestamp_column]]
|
|
100
|
+
if tags != "":
|
|
101
|
+
line_protocol = f"{self.measurement_name},{tags} {fields} {timestamp}"
|
|
102
|
+
else:
|
|
103
|
+
line_protocol = f"{self.measurement_name} {fields} {timestamp}"
|
|
104
|
+
|
|
105
|
+
return line_protocol
|
|
106
|
+
|
|
107
|
+
def serialize(self, chunk_idx: int = None):
|
|
108
|
+
import polars as pl
|
|
109
|
+
|
|
110
|
+
df = self.data_frame
|
|
111
|
+
|
|
112
|
+
# Check if the timestamp column is already an integer
|
|
113
|
+
if df[self.timestamp_column].dtype in [pl.Int32, pl.Int64]:
|
|
114
|
+
# The timestamp column is already an integer, assuming it's in Unix format
|
|
115
|
+
pass
|
|
116
|
+
else:
|
|
117
|
+
# Convert timestamp to Unix timestamp based on specified precision
|
|
118
|
+
if self.precision in [None, 'ns']:
|
|
119
|
+
df = df.with_columns(
|
|
120
|
+
pl.col(self.timestamp_column).dt.epoch(time_unit="ns").alias(self.timestamp_column))
|
|
121
|
+
elif self.precision == 'us':
|
|
122
|
+
df = df.with_columns(
|
|
123
|
+
pl.col(self.timestamp_column).dt.epoch(time_unit="us").alias(self.timestamp_column))
|
|
124
|
+
elif self.precision == 'ms':
|
|
125
|
+
df = df.with_columns(
|
|
126
|
+
pl.col(self.timestamp_column).dt.epoch(time_unit="ms").alias(self.timestamp_column))
|
|
127
|
+
elif self.precision == 's':
|
|
128
|
+
df = df.with_columns(pl.col(self.timestamp_column).dt.epoch(time_unit="s").alias(self.timestamp_column))
|
|
129
|
+
else:
|
|
130
|
+
raise ValueError(f"Unsupported precision: {self.precision}")
|
|
131
|
+
|
|
132
|
+
if chunk_idx is None:
|
|
133
|
+
chunk = df
|
|
134
|
+
else:
|
|
135
|
+
logger.debug("Serialize chunk %s/%s ...", chunk_idx + 1, self.number_of_chunks)
|
|
136
|
+
chunk = df[chunk_idx * self.chunk_size:(chunk_idx + 1) * self.chunk_size]
|
|
137
|
+
|
|
138
|
+
# Apply the UDF to each row
|
|
139
|
+
line_protocol_expr = chunk.map_rows(self.to_line_protocol, return_dtype=pl.Object)
|
|
140
|
+
|
|
141
|
+
lp = line_protocol_expr['map'].to_list()
|
|
142
|
+
|
|
143
|
+
return lp
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def polars_data_frame_to_list_of_points(data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION, **kwargs):
|
|
147
|
+
"""
|
|
148
|
+
Serialize DataFrame into LineProtocols.
|
|
149
|
+
|
|
150
|
+
:param data_frame: Pandas DataFrame to serialize
|
|
151
|
+
:param point_settings: Default Tags
|
|
152
|
+
:param precision: The precision for the unix timestamps within the body line-protocol.
|
|
153
|
+
:key data_frame_measurement_name: name of measurement for writing Pandas DataFrame
|
|
154
|
+
:key data_frame_tag_columns: list of DataFrame columns which are tags, rest columns will be fields
|
|
155
|
+
:key data_frame_timestamp_column: name of DataFrame column which contains a timestamp. The column can be defined as a :class:`~str` value
|
|
156
|
+
formatted as `2018-10-26`, `2018-10-26 12:00`, `2018-10-26 12:00:00-05:00`
|
|
157
|
+
or other formats and types supported by `pandas.to_datetime <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html#pandas.to_datetime>`_ - ``DataFrame``
|
|
158
|
+
:key data_frame_timestamp_timezone: name of the timezone which is used for timestamp column - ``DataFrame``
|
|
159
|
+
""" # noqa: E501
|
|
160
|
+
return PolarsDataframeSerializer(data_frame, point_settings, precision, **kwargs).serialize()
|
{influxdb3_python-0.6.1 → influxdb3_python-0.7.0}/influxdb_client_3/write_client/client/write_api.py
RENAMED
|
@@ -19,8 +19,7 @@ from reactivex.subject import Subject
|
|
|
19
19
|
from influxdb_client_3.write_client.domain import WritePrecision
|
|
20
20
|
from influxdb_client_3.write_client.client._base import _BaseWriteApi, _HAS_DATACLASS
|
|
21
21
|
from influxdb_client_3.write_client.client.util.helpers import get_org_query_param
|
|
22
|
-
from influxdb_client_3.write_client.client.write.dataframe_serializer import
|
|
23
|
-
PolarsDataframeSerializer)
|
|
22
|
+
from influxdb_client_3.write_client.client.write.dataframe_serializer import DataframeSerializer
|
|
24
23
|
from influxdb_client_3.write_client.client.write.point import Point, DEFAULT_WRITE_PRECISION
|
|
25
24
|
from influxdb_client_3.write_client.client.write.retry import WritesRetry
|
|
26
25
|
from influxdb_client_3.write_client.rest import _UTF_8_encoding
|
|
@@ -462,6 +461,7 @@ You can use native asynchronous version of the client:
|
|
|
462
461
|
precision, **kwargs)
|
|
463
462
|
|
|
464
463
|
elif 'polars' in str(type(data)):
|
|
464
|
+
from influxdb_client_3.write_client.client.write.dataframe_serializer import PolarsDataframeSerializer
|
|
465
465
|
serializer = PolarsDataframeSerializer(data,
|
|
466
466
|
self._point_settings, precision,
|
|
467
467
|
self._write_options.batch_size, **kwargs)
|
|
@@ -10,9 +10,4 @@ try:
|
|
|
10
10
|
except ModuleNotFoundError as err:
|
|
11
11
|
raise ImportError(f"`data_frame` requires numpy which couldn't be imported due: {err}")
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
import polars as pl
|
|
15
|
-
except ModuleNotFoundError as err:
|
|
16
|
-
raise ImportError(f"`polars_frame` requires polars which couldn't be imported due: {err}")
|
|
17
|
-
|
|
18
|
-
__all__ = ['pd', 'np', 'pl']
|
|
13
|
+
__all__ = ['pd', 'np']
|