quasardb 3.14.2.dev7__cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of quasardb might be problematic. Click here for more details.

Files changed (69) hide show
  1. quasardb/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
  2. quasardb/CMakeFiles/progress.marks +1 -0
  3. quasardb/Makefile +189 -0
  4. quasardb/__init__.py +140 -0
  5. quasardb/__init__.pyi +72 -0
  6. quasardb/cmake_install.cmake +58 -0
  7. quasardb/date/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
  8. quasardb/date/CMakeFiles/Export/b76006b2b7125baf1b0b4d4ca4db82bd/dateTargets.cmake +108 -0
  9. quasardb/date/CMakeFiles/progress.marks +1 -0
  10. quasardb/date/Makefile +189 -0
  11. quasardb/date/cmake_install.cmake +81 -0
  12. quasardb/date/dateConfigVersion.cmake +65 -0
  13. quasardb/date/dateTargets.cmake +63 -0
  14. quasardb/extensions/__init__.py +8 -0
  15. quasardb/extensions/writer.py +191 -0
  16. quasardb/firehose.py +103 -0
  17. quasardb/libqdb_api.so +0 -0
  18. quasardb/numpy/__init__.py +1045 -0
  19. quasardb/pandas/__init__.py +533 -0
  20. quasardb/pool.py +311 -0
  21. quasardb/pybind11/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
  22. quasardb/pybind11/CMakeFiles/progress.marks +1 -0
  23. quasardb/pybind11/Makefile +189 -0
  24. quasardb/pybind11/cmake_install.cmake +50 -0
  25. quasardb/quasardb/__init__.pyi +97 -0
  26. quasardb/quasardb/_batch_column.pyi +5 -0
  27. quasardb/quasardb/_batch_inserter.pyi +30 -0
  28. quasardb/quasardb/_blob.pyi +16 -0
  29. quasardb/quasardb/_cluster.pyi +100 -0
  30. quasardb/quasardb/_continuous.pyi +16 -0
  31. quasardb/quasardb/_double.pyi +7 -0
  32. quasardb/quasardb/_entry.pyi +60 -0
  33. quasardb/quasardb/_error.pyi +15 -0
  34. quasardb/quasardb/_integer.pyi +7 -0
  35. quasardb/quasardb/_node.pyi +26 -0
  36. quasardb/quasardb/_options.pyi +105 -0
  37. quasardb/quasardb/_perf.pyi +5 -0
  38. quasardb/quasardb/_properties.pyi +5 -0
  39. quasardb/quasardb/_query.pyi +2 -0
  40. quasardb/quasardb/_reader.pyi +9 -0
  41. quasardb/quasardb/_retry.pyi +16 -0
  42. quasardb/quasardb/_string.pyi +12 -0
  43. quasardb/quasardb/_table.pyi +125 -0
  44. quasardb/quasardb/_tag.pyi +5 -0
  45. quasardb/quasardb/_timestamp.pyi +9 -0
  46. quasardb/quasardb/_writer.pyi +111 -0
  47. quasardb/quasardb/metrics/__init__.pyi +20 -0
  48. quasardb/quasardb.cpython-310-aarch64-linux-gnu.so +0 -0
  49. quasardb/range-v3/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
  50. quasardb/range-v3/CMakeFiles/Export/48a02d54b5e9e60c30c5f249b431a911/range-v3-targets.cmake +128 -0
  51. quasardb/range-v3/CMakeFiles/progress.marks +1 -0
  52. quasardb/range-v3/CMakeFiles/range.v3.headers.dir/DependInfo.cmake +22 -0
  53. quasardb/range-v3/CMakeFiles/range.v3.headers.dir/build.make +86 -0
  54. quasardb/range-v3/CMakeFiles/range.v3.headers.dir/cmake_clean.cmake +5 -0
  55. quasardb/range-v3/CMakeFiles/range.v3.headers.dir/compiler_depend.make +2 -0
  56. quasardb/range-v3/CMakeFiles/range.v3.headers.dir/compiler_depend.ts +2 -0
  57. quasardb/range-v3/CMakeFiles/range.v3.headers.dir/progress.make +1 -0
  58. quasardb/range-v3/Makefile +204 -0
  59. quasardb/range-v3/cmake_install.cmake +93 -0
  60. quasardb/range-v3/include/range/v3/version.hpp +24 -0
  61. quasardb/range-v3/range-v3-config-version.cmake +83 -0
  62. quasardb/range-v3/range-v3-config.cmake +80 -0
  63. quasardb/stats.py +358 -0
  64. quasardb/table_cache.py +56 -0
  65. quasardb-3.14.2.dev7.dist-info/METADATA +41 -0
  66. quasardb-3.14.2.dev7.dist-info/RECORD +69 -0
  67. quasardb-3.14.2.dev7.dist-info/WHEEL +6 -0
  68. quasardb-3.14.2.dev7.dist-info/licenses/LICENSE.md +11 -0
  69. quasardb-3.14.2.dev7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,533 @@
1
+ # pylint: disable=C0103,C0111,C0302,R0903
2
+
3
+ # Copyright (c) 2009-2024, quasardb SAS. All rights reserved.
4
+ # All rights reserved.
5
+ #
6
+ # Redistribution and use in source and binary forms, with or without
7
+ # modification, are permitted provided that the following conditions are met:
8
+ #
9
+ # * Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # * Redistributions in binary form must reproduce the above copyright
12
+ # notice, this list of conditions and the following disclaimer in the
13
+ # documentation and/or other materials provided with the distribution.
14
+ # * Neither the name of quasardb nor the names of its contributors may
15
+ # be used to endorse or promote products derived from this software
16
+ # without specific prior written permission.
17
+ #
18
+ # THIS SOFTWARE IS PROVIDED BY QUASARDB AND CONTRIBUTORS ``AS IS'' AND ANY
19
+ # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
22
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ #
29
+
30
+ import logging
31
+ import warnings
32
+ from datetime import datetime
33
+ from functools import partial
34
+
35
+ import quasardb
36
+ import quasardb.table_cache as table_cache
37
+ import quasardb.numpy as qdbnp
38
+
39
+
40
+ logger = logging.getLogger("quasardb.pandas")
41
+
42
+
43
+ class PandasRequired(ImportError):
44
+ """
45
+ Exception raised when trying to use QuasarDB pandas integration, but
46
+ pandas has not been installed.
47
+ """
48
+
49
+ pass
50
+
51
+
52
+ try:
53
+ import numpy as np
54
+ import numpy.ma as ma
55
+ import pandas as pd
56
+ from pandas.core.api import DataFrame, Series
57
+ from pandas.core.base import PandasObject
58
+
59
+ except ImportError:
60
+ raise PandasRequired("The pandas library is required to handle pandas data formats")
61
+
62
+
63
+ # Constant mapping of numpy dtype to QuasarDB column type
64
+ # TODO(leon): support this natively in qdb C api ? we have everything we need
65
+ # to understand dtypes.
66
+ _dtype_map = {
67
+ np.dtype("int64"): quasardb.ColumnType.Int64,
68
+ np.dtype("int32"): quasardb.ColumnType.Int64,
69
+ np.dtype("float64"): quasardb.ColumnType.Double,
70
+ np.dtype("object"): quasardb.ColumnType.String,
71
+ np.dtype("M8[ns]"): quasardb.ColumnType.Timestamp,
72
+ np.dtype("datetime64[ns]"): quasardb.ColumnType.Timestamp,
73
+ "int64": quasardb.ColumnType.Int64,
74
+ "int32": quasardb.ColumnType.Int64,
75
+ "float32": quasardb.ColumnType.Double,
76
+ "float64": quasardb.ColumnType.Double,
77
+ "timestamp": quasardb.ColumnType.Timestamp,
78
+ "string": quasardb.ColumnType.String,
79
+ "bytes": quasardb.ColumnType.Blob,
80
+ "floating": quasardb.ColumnType.Double,
81
+ "integer": quasardb.ColumnType.Int64,
82
+ "bytes": quasardb.ColumnType.Blob,
83
+ "string": quasardb.ColumnType.String,
84
+ "datetime64": quasardb.ColumnType.Timestamp,
85
+ }
86
+
87
+
88
+ def read_series(table, col_name, ranges=None):
89
+ """
90
+ Read a Pandas Timeseries from a single column.
91
+
92
+ Parameters:
93
+ -----------
94
+
95
+ table : quasardb.Timeseries
96
+ QuasarDB Timeseries table object, e.g. qdb_cluster.table('my_table')
97
+
98
+ col_name : str
99
+ Name of the column to read.
100
+
101
+ ranges : list
102
+ A list of ranges to read, represented as tuples of Numpy datetime64[ns] objects.
103
+ """
104
+ read_with = {
105
+ quasardb.ColumnType.Double: table.double_get_ranges,
106
+ quasardb.ColumnType.Blob: table.blob_get_ranges,
107
+ quasardb.ColumnType.String: table.string_get_ranges,
108
+ quasardb.ColumnType.Int64: table.int64_get_ranges,
109
+ quasardb.ColumnType.Timestamp: table.timestamp_get_ranges,
110
+ quasardb.ColumnType.Symbol: table.string_get_ranges,
111
+ }
112
+
113
+ kwargs = {"column": col_name}
114
+
115
+ if ranges is not None:
116
+ kwargs["ranges"] = ranges
117
+
118
+ # Dispatch based on column type
119
+ t = table.column_type_by_id(col_name)
120
+
121
+ logger.info(
122
+ "reading Series from column %s.%s with type %s", table.get_name(), col_name, t
123
+ )
124
+
125
+ res = (read_with[t])(**kwargs)
126
+
127
+ return Series(res[1], index=res[0])
128
+
129
+
130
+ def write_series(series, table, col_name, infer_types=True, dtype=None):
131
+ """
132
+ Writes a Pandas Timeseries to a single column.
133
+
134
+ Parameters:
135
+ -----------
136
+
137
+ series : pandas.Series
138
+ Pandas Series, with a numpy.datetime64[ns] as index. Underlying data will be attempted
139
+ to be transformed to appropriate QuasarDB type.
140
+
141
+ table : quasardb.Timeseries
142
+ QuasarDB Timeseries table object, e.g. qdb_cluster.table('my_table')
143
+
144
+ col_name : str
145
+ Column name to store data in.
146
+ """
147
+
148
+ logger.debug(
149
+ "write_series, table=%s, col_name=%s, infer_types=%s, dtype=%s",
150
+ table.get_name(),
151
+ col_name,
152
+ infer_types,
153
+ dtype,
154
+ )
155
+
156
+ data = None
157
+ index = None
158
+
159
+ data = ma.masked_array(series.to_numpy(copy=False), mask=series.isna())
160
+
161
+ if infer_types is True:
162
+ index = series.index.to_numpy("datetime64[ns]", copy=False)
163
+ else:
164
+ index = series.index.to_numpy(copy=False)
165
+
166
+ assert data is not None
167
+ assert index is not None
168
+
169
+ return qdbnp.write_array(
170
+ data=data,
171
+ index=index,
172
+ table=table,
173
+ column=col_name,
174
+ dtype=dtype,
175
+ infer_types=infer_types,
176
+ )
177
+
178
+
179
+ def query(
180
+ cluster: quasardb.Cluster,
181
+ query: str,
182
+ index: str = None,
183
+ blobs: bool = False,
184
+ numpy: bool = True,
185
+ ):
186
+ """
187
+ Execute *query* and return the result as a pandas DataFrame.
188
+
189
+ Parameters
190
+ ----------
191
+ cluster : quasardb.Cluster
192
+ Active connection to the QuasarDB cluster.
193
+
194
+ query : str
195
+ The query to execute.
196
+
197
+ index : str | None, default None
198
+ Column to use as index. When None a synthetic index is created and
199
+ named “$index”.
200
+
201
+ blobs, numpy
202
+ DEPRECATED – no longer used. Supplying a non-default value raises a
203
+ DeprecationWarning and the argument is ignored.
204
+ """
205
+ # ------------------------------------------------------------------ deprecations
206
+ if blobs is not False:
207
+ warnings.warn(
208
+ "`blobs` is deprecated and will be removed in a future version; "
209
+ "the argument is ignored.",
210
+ DeprecationWarning,
211
+ stacklevel=2,
212
+ )
213
+ if numpy is not True:
214
+ warnings.warn(
215
+ "`numpy` is deprecated and will be removed in a future version; "
216
+ "the argument is ignored.",
217
+ DeprecationWarning,
218
+ stacklevel=2,
219
+ )
220
+ # ------------------------------------------------------------------------------
221
+
222
+ logger.debug("querying and returning as DataFrame: %s", query)
223
+ index_vals, m = qdbnp.query(cluster, query, index=index, dict=True)
224
+
225
+ index_name = "$index" if index is None else index
226
+ index_obj = pd.Index(index_vals, name=index_name)
227
+
228
+ return pd.DataFrame(m, index=index_obj)
229
+
230
+
231
+ def stream_dataframes(
232
+ conn: quasardb.Cluster,
233
+ tables: list,
234
+ *,
235
+ batch_size: int = 2**16,
236
+ column_names: list = None,
237
+ ranges: list = None,
238
+ ):
239
+ """
240
+ Read a Pandas Dataframe from a QuasarDB Timeseries table. Returns a generator with dataframes of size `batch_size`, which is useful
241
+ when traversing a large dataset which does not fit into memory.
242
+
243
+ Accepts the same parameters as `stream_dataframes`.
244
+
245
+ Parameters:
246
+ -----------
247
+
248
+ conn : quasardb.Cluster
249
+ Connection to the QuasarDB database.
250
+
251
+ tables : list[str | quasardb.Table]
252
+ QuasarDB tables to stream, as a list of strings or quasardb table objects.
253
+
254
+ batch_size : int
255
+ The amount of rows to fetch in a single read operation. If unset, uses 2^16 (65536) rows
256
+ as batch size by default.
257
+
258
+ column_names : optional list
259
+ List of columns to read in dataframe. The timestamp column '$timestamp' is
260
+ always read.
261
+
262
+ Defaults to all columns.
263
+
264
+ ranges: optional list
265
+ A list of time ranges to read, represented as tuples of Numpy datetime64[ns] objects.
266
+ Defaults to the entire table.
267
+
268
+ """
269
+ # Sanitize batch_size
270
+ if batch_size == None:
271
+ batch_size = 2**16
272
+ elif not isinstance(batch_size, int):
273
+ raise TypeError(
274
+ "batch_size should be an integer, but got: {} with value {}".format(
275
+ type(batch_size), str(batch_size)
276
+ )
277
+ )
278
+
279
+ kwargs = {"batch_size": batch_size}
280
+
281
+ if column_names:
282
+ kwargs["column_names"] = column_names
283
+
284
+ if ranges:
285
+ kwargs["ranges"] = ranges
286
+
287
+ coerce_table_name_fn = lambda x: x if isinstance(x, str) else x.get_name()
288
+ kwargs["table_names"] = [coerce_table_name_fn(x) for x in tables]
289
+
290
+ with conn.reader(**kwargs) as reader:
291
+ for batch in reader:
292
+ # We always expect the timestamp column, and set this as the index
293
+ assert "$timestamp" in batch
294
+
295
+ idx = pd.Index(batch.pop("$timestamp"), copy=False, name="$timestamp")
296
+ df = pd.DataFrame(batch, index=idx)
297
+
298
+ yield df
299
+
300
+
301
+ def stream_dataframe(conn: quasardb.Cluster, table, **kwargs):
302
+ """
303
+ Read a single table and return a stream of dataframes. This is a convenience function that wraps around
304
+ `stream_dataframes`.
305
+ """
306
+ kwargs["tables"] = [table]
307
+
308
+ # For backwards compatibility, we drop the `$table` column returned: this is not strictly
309
+ # necessary, but it also is somewhat reasonable to drop it when we're reading from a single
310
+ # table, which is the case here.
311
+ clean_df_fn = lambda df: df.drop(columns=["$table"])
312
+
313
+ return (clean_df_fn(df) for df in stream_dataframes(conn, **kwargs))
314
+
315
+
316
+ def read_dataframe(conn: quasardb.Cluster, table, **kwargs):
317
+ """
318
+ Read a Pandas Dataframe from a QuasarDB Timeseries table. Wraps around stream_dataframes(), and
319
+ returns everything as a single dataframe. batch_size is always explicitly set to 0.
320
+
321
+
322
+ Parameters:
323
+ -----------
324
+
325
+ conn : quasardb.Cluster
326
+ Connection to the QuasarDB database.
327
+
328
+ table : str | quasardb.Table
329
+ QuasarDB table to stream, either as a string or a table object. When re-executing the same function
330
+ multiple times on the same tables, providing the table as an object has a performance benefit.
331
+
332
+ """
333
+
334
+ if (
335
+ "batch_size" in kwargs
336
+ and kwargs["batch_size"] != 0
337
+ and kwargs["batch_size"] != None
338
+ ):
339
+ logger.warn(
340
+ "Providing a batch size with read_dataframe is unsupported, overriding batch_size to 65536."
341
+ )
342
+ logger.warn(
343
+ "If you wish to traverse the data in smaller batches, please use: stream_dataframe()."
344
+ )
345
+ kwargs["batch_size"] = 2**16
346
+
347
+ # Note that this is *lazy*, dfs is a generator, not a list -- as such, dataframes will be
348
+ # fetched on-demand, which means that an error could occur in the middle of processing
349
+ # dataframes.
350
+ dfs = stream_dataframe(conn, table, **kwargs)
351
+
352
+ # if result of stream_dataframe is empty this could result in ValueError on pd.concat()
353
+ # as stream_dataframe is a generator there is no easy way to check for this condition without evaluation
354
+ # the most simple way is to catch the ValueError and return an empty DataFrame
355
+ try:
356
+ return pd.concat(dfs, copy=False)
357
+ except ValueError as e:
358
+ logger.error(
359
+ "Error while concatenating dataframes. This can happen if result set is empty. Returning empty dataframe. Error: %s",
360
+ e,
361
+ )
362
+ return pd.DataFrame()
363
+
364
+
365
+ def _extract_columns(df, cinfos):
366
+ """
367
+ Converts dataframe to a number of numpy arrays, one for each column.
368
+
369
+ Arrays will be indexed by relative offset, in the same order as the table's columns.
370
+ If a table column is not present in the dataframe, it it have a None entry.
371
+ If a dataframe column is not present in the table, it will be ommitted.
372
+ """
373
+ ret = {}
374
+
375
+ # Grab all columns from the DataFrame in the order of table columns,
376
+ # put None if not present in df.
377
+ for i in range(len(cinfos)):
378
+ (cname, ctype) = cinfos[i]
379
+ xs = None
380
+
381
+ if cname in df.columns:
382
+ arr = df[cname].array
383
+ ret[cname] = ma.masked_array(arr.to_numpy(copy=False), mask=arr.isna())
384
+
385
+ return ret
386
+
387
+
388
+ def write_dataframes(dfs, cluster, *, create=False, shard_size=None, **kwargs):
389
+ """
390
+ Store dataframes into a table. Any additional parameters not documented here
391
+ are passed to numpy.write_arrays(). Please consult the pydoc of that function
392
+ for additional accepted parameters.
393
+
394
+ Parameters:
395
+ -----------
396
+
397
+ dfs: dict[str | quasardb.Table, pd.DataFrame] | list[tuple[str | quasardb.Table, pd.DataFrame]]
398
+ This can be either a dict that maps table (either objects or names) to a dataframe, or a list
399
+ of table<>dataframe tuples.
400
+
401
+ cluster: quasardb.Cluster
402
+ Active connection to the QuasarDB cluster
403
+
404
+ create: optional bool
405
+ Whether to create the table. Defaults to False.
406
+
407
+ shard_size: optional datetime.timedelta
408
+ The shard size of the timeseries you wish to create when `create` is True.
409
+ """
410
+
411
+ # If dfs is a dict, we convert it to a list of tuples.
412
+ if isinstance(dfs, dict):
413
+ dfs = dfs.items()
414
+
415
+ if shard_size is not None and create == False:
416
+ raise ValueError("Invalid argument: shard size provided while create is False")
417
+
418
+ # If the tables are provided as strings, we look them up.
419
+ dfs_ = []
420
+ for table, df in dfs:
421
+ if isinstance(table, str):
422
+ table = table_cache.lookup(table, cluster)
423
+
424
+ dfs_.append((table, df))
425
+
426
+ data_by_table = []
427
+
428
+ for table, df in dfs_:
429
+ logger.debug("quasardb.pandas.write_dataframe, create = %s", create)
430
+ assert isinstance(df, pd.DataFrame)
431
+
432
+ # Create table if requested
433
+ if create:
434
+ _create_table_from_df(df, table, shard_size)
435
+
436
+ cinfos = [(x.name, x.type) for x in table.list_columns()]
437
+
438
+ if not df.index.is_monotonic_increasing:
439
+ logger.warn(
440
+ "dataframe index is unsorted, resorting dataframe based on index"
441
+ )
442
+ df = df.sort_index().reindex()
443
+
444
+ # We pass everything else to our qdbnp.write_arrays function, as generally speaking
445
+ # it is (much) more sensible to deal with numpy arrays than Pandas dataframes:
446
+ # pandas has the bad habit of wanting to cast data to different types if your data
447
+ # is sparse, most notably forcing sparse integer arrays to floating points.
448
+
449
+ data = _extract_columns(df, cinfos)
450
+ data["$timestamp"] = df.index.to_numpy(copy=False, dtype="datetime64[ns]")
451
+
452
+ data_by_table.append((table, data))
453
+
454
+ kwargs["deprecation_stacklevel"] = kwargs.get("deprecation_stacklevel", 1) + 1
455
+ return qdbnp.write_arrays(data_by_table, cluster, table=None, index=None, **kwargs)
456
+
457
+
458
+ def write_dataframe(df, cluster, table, **kwargs):
459
+ """
460
+ Store a single dataframe into a table. Takes the same arguments as `write_dataframes`, except only
461
+ a single df/table combination.
462
+ """
463
+ kwargs["deprecation_stacklevel"] = kwargs.get("deprecation_stacklevel", 1) + 1
464
+ write_dataframes([(table, df)], cluster, **kwargs)
465
+
466
+
467
+ def write_pinned_dataframe(*args, **kwargs):
468
+ """
469
+ Legacy wrapper around write_dataframe()
470
+ """
471
+ logger.warn(
472
+ "write_pinned_dataframe is deprecated and will be removed in a future release."
473
+ )
474
+ logger.warn("Please use write_dataframe directly instead")
475
+ kwargs["deprecation_stacklevel"] = 2
476
+ return write_dataframe(*args, **kwargs)
477
+
478
+
479
+ def _create_table_from_df(df, table, shard_size=None):
480
+ cols = list()
481
+
482
+ dtypes = _get_inferred_dtypes(df)
483
+
484
+ logger.info("got inferred dtypes: %s", dtypes)
485
+ for c in df.columns:
486
+ dt = dtypes[c]
487
+ ct = _dtype_to_column_type(df[c].dtype, dt)
488
+ logger.debug(
489
+ "probed pandas dtype %s to inferred dtype %s and map to quasardb column type %s",
490
+ df[c].dtype,
491
+ dt,
492
+ ct,
493
+ )
494
+ cols.append(quasardb.ColumnInfo(ct, c))
495
+
496
+ try:
497
+ if not shard_size:
498
+ table.create(cols)
499
+ else:
500
+ table.create(cols, shard_size)
501
+ except quasardb.quasardb.AliasAlreadyExistsError:
502
+ # TODO(leon): warn? how?
503
+ pass
504
+
505
+ return table
506
+
507
+
508
+ def _dtype_to_column_type(dt, inferred):
509
+ res = _dtype_map.get(inferred, None)
510
+ if res is None:
511
+ res = _dtype_map.get(dt, None)
512
+
513
+ if res is None:
514
+ raise ValueError("Incompatible data type: ", dt)
515
+
516
+ return res
517
+
518
+
519
+ def _get_inferred_dtypes(df):
520
+ dtypes = dict()
521
+ for i in range(len(df.columns)):
522
+ c = df.columns[i]
523
+ dt = pd.api.types.infer_dtype(df[c].values)
524
+ logger.debug("Determined dtype of column %s to be %s", c, dt)
525
+ dtypes[c] = dt
526
+ return dtypes
527
+
528
+
529
+ def _get_inferred_dtypes_indexed(df):
530
+ dtypes = _get_inferred_dtypes(df)
531
+ # Performance improvement: avoid a expensive dict lookups by indexing
532
+ # the column types by relative offset within the df.
533
+ return list(dtypes[c] for c in df.columns)