quasardb 3.14.2.dev4__cp39-cp39-win_amd64.whl → 3.14.2.dev6__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of quasardb might be problematic. Click here for more details.

Files changed (31) hide show
  1. quasardb/INSTALL.vcxproj +5 -5
  2. quasardb/__init__.py +21 -7
  3. quasardb/date/ALL_BUILD.vcxproj +9 -9
  4. quasardb/date/CMakeFiles/Export/df49adab93b9e0c10c64f72458b31971/dateTargets.cmake +1 -1
  5. quasardb/date/CMakeFiles/generate.stamp.depend +4 -4
  6. quasardb/date/INSTALL.vcxproj +5 -5
  7. quasardb/date/dateTargets.cmake +1 -1
  8. quasardb/extensions/writer.py +59 -61
  9. quasardb/firehose.py +24 -22
  10. quasardb/numpy/__init__.py +262 -128
  11. quasardb/pandas/__init__.py +145 -91
  12. quasardb/pool.py +13 -2
  13. quasardb/pybind11/ALL_BUILD.vcxproj +9 -9
  14. quasardb/pybind11/CMakeFiles/generate.stamp.depend +14 -14
  15. quasardb/pybind11/INSTALL.vcxproj +5 -5
  16. quasardb/qdb_api.dll +0 -0
  17. quasardb/quasardb.cp39-win_amd64.pyd +0 -0
  18. quasardb/range-v3/ALL_BUILD.vcxproj +9 -9
  19. quasardb/range-v3/CMakeFiles/Export/d94ef200eca10a819b5858b33e808f5b/range-v3-targets.cmake +1 -1
  20. quasardb/range-v3/CMakeFiles/generate.stamp.depend +11 -11
  21. quasardb/range-v3/INSTALL.vcxproj +5 -5
  22. quasardb/range-v3/range-v3-config.cmake +1 -1
  23. quasardb/range-v3/range.v3.headers.vcxproj +9 -9
  24. quasardb/stats.py +245 -120
  25. quasardb/table_cache.py +5 -1
  26. {quasardb-3.14.2.dev4.dist-info → quasardb-3.14.2.dev6.dist-info}/METADATA +3 -2
  27. quasardb-3.14.2.dev6.dist-info/RECORD +54 -0
  28. {quasardb-3.14.2.dev4.dist-info → quasardb-3.14.2.dev6.dist-info}/WHEEL +1 -1
  29. quasardb-3.14.2.dev4.dist-info/RECORD +0 -54
  30. {quasardb-3.14.2.dev4.dist-info → quasardb-3.14.2.dev6.dist-info/licenses}/LICENSE.md +0 -0
  31. {quasardb-3.14.2.dev4.dist-info → quasardb-3.14.2.dev6.dist-info}/top_level.txt +0 -0
@@ -29,11 +29,12 @@
29
29
 
30
30
  import logging
31
31
  import time
32
+ import warnings
32
33
 
33
34
  import quasardb
34
35
  import quasardb.table_cache as table_cache
35
36
 
36
- logger = logging.getLogger('quasardb.numpy')
37
+ logger = logging.getLogger("quasardb.numpy")
37
38
 
38
39
 
39
40
  class NumpyRequired(ImportError):
@@ -41,6 +42,7 @@ class NumpyRequired(ImportError):
41
42
  Exception raised when trying to use QuasarDB pandas integration, but
42
43
  pandas has not been installed.
43
44
  """
45
+
44
46
  pass
45
47
 
46
48
 
@@ -52,7 +54,7 @@ except ImportError as err:
52
54
  logger.exception(err)
53
55
  raise NumpyRequired(
54
56
  "The numpy library is required to handle numpy arrays formats"
55
- ) from err
57
+ ) from err
56
58
 
57
59
 
58
60
  class IncompatibleDtypeError(TypeError):
@@ -68,13 +70,16 @@ class IncompatibleDtypeError(TypeError):
68
70
  super().__init__(self.msg())
69
71
 
70
72
  def msg(self):
71
- return "Data for column '{}' with type '{}' was provided in dtype '{}' but need '{}'.".format(self.cname, self.ctype, self.provided, self.expected)
73
+ return "Data for column '{}' with type '{}' was provided in dtype '{}' but need '{}'.".format(
74
+ self.cname, self.ctype, self.provided, self.expected
75
+ )
72
76
 
73
77
 
74
78
  class IncompatibleDtypeErrors(TypeError):
75
79
  """
76
80
  Wraps multiple dtype errors
77
81
  """
82
+
78
83
  def __init__(self, xs):
79
84
  self.xs = xs
80
85
  super().__init__(self.msg())
@@ -82,29 +87,33 @@ class IncompatibleDtypeErrors(TypeError):
82
87
  def msg(self):
83
88
  return "\n".join(x.msg() for x in self.xs)
84
89
 
90
+
85
91
  class InvalidDataCardinalityError(ValueError):
86
92
  """
87
93
  Raised when the provided data arrays doesn't match the table's columns.
88
94
  """
95
+
89
96
  def __init__(self, data, cinfos):
90
97
  self.data = data
91
98
  self.cinfos = cinfos
92
99
  super().__init__(self.msg())
93
100
 
94
101
  def msg(self):
95
- return "Provided data array length '{}' exceeds amount of table columns '{}', unable to map data to columns".format(len(self.data), len(self.cinfos))
102
+ return "Provided data array length '{}' exceeds amount of table columns '{}', unable to map data to columns".format(
103
+ len(self.data), len(self.cinfos)
104
+ )
96
105
 
97
106
 
98
107
  # Based on QuasarDB column types, which dtype do we accept?
99
108
  # First entry will always be the 'preferred' dtype, other ones
100
109
  # those that we can natively convert in native code.
101
110
  _ctype_to_dtype = {
102
- quasardb.ColumnType.String: [np.dtype('U')],
103
- quasardb.ColumnType.Symbol: [np.dtype('U')],
104
- quasardb.ColumnType.Int64: [np.dtype('i8'), np.dtype('i4'), np.dtype('i2')],
105
- quasardb.ColumnType.Double: [np.dtype('f8'), np.dtype('f4')],
106
- quasardb.ColumnType.Blob: [np.dtype('S'), np.dtype('O')],
107
- quasardb.ColumnType.Timestamp: [np.dtype('datetime64[ns]')]
111
+ quasardb.ColumnType.String: [np.dtype("U")],
112
+ quasardb.ColumnType.Symbol: [np.dtype("U")],
113
+ quasardb.ColumnType.Int64: [np.dtype("i8"), np.dtype("i4"), np.dtype("i2")],
114
+ quasardb.ColumnType.Double: [np.dtype("f8"), np.dtype("f4")],
115
+ quasardb.ColumnType.Blob: [np.dtype("S"), np.dtype("O")],
116
+ quasardb.ColumnType.Timestamp: [np.dtype("datetime64[ns]")],
108
117
  }
109
118
 
110
119
 
@@ -141,9 +150,12 @@ def _coerce_dtype(dtype, columns):
141
150
  # Any columns not provided will have a 'None' dtype.
142
151
  dtype_ = [None] * len(columns)
143
152
 
144
- for (k, dt) in dtype.items():
153
+ for k, dt in dtype.items():
145
154
  if not k in offsets:
146
- logger.warn("Forced dtype provided for column '%s' = %s, but that column is not found in the table. Skipping...", k, )
155
+ logger.warn(
156
+ "Forced dtype provided for column '%s' = %s, but that column is not found in the table. Skipping...",
157
+ k,
158
+ )
147
159
 
148
160
  i = offsets[k]
149
161
  dtype_[i] = dt
@@ -151,16 +163,22 @@ def _coerce_dtype(dtype, columns):
151
163
  dtype = dtype_
152
164
 
153
165
  if type(dtype) is not list:
154
- raise ValueError("Forced dtype argument provided, but the argument has an incompatible type. Expected: list-like or dict-like, got: {}".format(type(dtype)))
166
+ raise ValueError(
167
+ "Forced dtype argument provided, but the argument has an incompatible type. Expected: list-like or dict-like, got: {}".format(
168
+ type(dtype)
169
+ )
170
+ )
155
171
 
156
172
  if len(dtype) is not len(columns):
157
- raise ValueError("Expected exactly one dtype for each column, but %d dtypes were provided for %d columns".format(len(dtype), len(columns)))
173
+ raise ValueError(
174
+ "Expected exactly one dtype for each column, but %d dtypes were provided for %d columns".format(
175
+ len(dtype), len(columns)
176
+ )
177
+ )
158
178
 
159
179
  return dtype
160
180
 
161
181
 
162
-
163
-
164
182
  def _add_desired_dtypes(dtype, columns):
165
183
  """
166
184
  When infer_types=True, this function sets the 'desired' dtype for each of the columns.
@@ -174,7 +192,12 @@ def _add_desired_dtypes(dtype, columns):
174
192
  if dtype[i] is None:
175
193
  (cname, ctype) = columns[i]
176
194
  dtype_ = _best_dtype_for_ctype(ctype)
177
- logger.debug("using default dtype '%s' for column '%s' with type %s", dtype_, cname, ctype)
195
+ logger.debug(
196
+ "using default dtype '%s' for column '%s' with type %s",
197
+ dtype_,
198
+ cname,
199
+ ctype,
200
+ )
178
201
  dtype[i] = dtype_
179
202
 
180
203
  return dtype
@@ -196,8 +219,11 @@ def _is_all_masked(xs):
196
219
  # built-ins for object arrays
197
220
  return all(x is None for x in xs)
198
221
 
199
-
200
- logger.debug("{} is not a masked array, not convertible to requested type... ".format(type(xs)))
222
+ logger.debug(
223
+ "{} is not a masked array, not convertible to requested type... ".format(
224
+ type(xs)
225
+ )
226
+ )
201
227
 
202
228
  # This array is *not* a masked array, it's *not* convertible to the type we want,
203
229
  # and it's *not* an object array.
@@ -208,7 +234,7 @@ def _is_all_masked(xs):
208
234
 
209
235
 
210
236
  def dtypes_equal(lhs, rhs):
211
- if lhs.kind == 'U' or lhs.kind == 'S':
237
+ if lhs.kind == "U" or lhs.kind == "S":
212
238
  # Unicode and string data has variable length encoding, which means their itemsize
213
239
  # can be anything.
214
240
  #
@@ -236,43 +262,60 @@ def _validate_dtypes(data, columns):
236
262
  (cname, ctype, provided_dtype, expected_dtype) = e
237
263
  return
238
264
 
239
- for (data_, (cname, ctype)) in zip(data, columns):
265
+ for data_, (cname, ctype) in zip(data, columns):
240
266
  expected_ = _ctype_to_dtype[ctype]
241
267
 
242
268
  logger.debug("data_.dtype = %s, expected_ = %s", data_.dtype, expected_)
243
269
 
244
270
  if _dtype_found(data_.dtype, expected_) == False:
245
- errors.append(IncompatibleDtypeError(cname=cname, ctype=ctype, provided=data_.dtype, expected=expected_))
271
+ errors.append(
272
+ IncompatibleDtypeError(
273
+ cname=cname, ctype=ctype, provided=data_.dtype, expected=expected_
274
+ )
275
+ )
246
276
 
247
277
  if len(errors) > 0:
248
278
  raise IncompatibleDtypeErrors(errors)
249
279
 
280
+
250
281
  def _coerce_deduplicate(deduplicate, deduplication_mode, columns):
251
282
  """
252
283
  Throws an error when 'deduplicate' options are incorrect.
253
284
  """
254
285
  cnames = [cname for (cname, ctype) in columns]
255
286
 
256
- if deduplication_mode not in ['drop', 'upsert']:
257
- raise RuntimeError("deduplication_mode should be one of ['drop', 'upsert'], got: {}".format(deduplication_mode))
287
+ if deduplication_mode not in ["drop", "upsert"]:
288
+ raise RuntimeError(
289
+ "deduplication_mode should be one of ['drop', 'upsert'], got: {}".format(
290
+ deduplication_mode
291
+ )
292
+ )
258
293
 
259
294
  if isinstance(deduplicate, bool):
260
295
  return deduplicate
261
296
 
262
297
  # Special value of $timestamp, hardcoded
263
- if isinstance(deduplicate, str) and deduplicate == '$timestamp':
264
- deduplicate = ['$timestamp']
265
- cnames.append('$timestamp')
298
+ if isinstance(deduplicate, str) and deduplicate == "$timestamp":
299
+ deduplicate = ["$timestamp"]
300
+ cnames.append("$timestamp")
266
301
 
267
302
  if not isinstance(deduplicate, list):
268
- raise TypeError("drop_duplicates should be either a bool or a list, got: " + type(deduplicate))
303
+ raise TypeError(
304
+ "drop_duplicates should be either a bool or a list, got: "
305
+ + type(deduplicate)
306
+ )
269
307
 
270
308
  for column_name in deduplicate:
271
309
  if not column_name in cnames:
272
- raise RuntimeError("Provided deduplication column name '{}' not found in table columns.".format(column_name))
310
+ raise RuntimeError(
311
+ "Provided deduplication column name '{}' not found in table columns.".format(
312
+ column_name
313
+ )
314
+ )
273
315
 
274
316
  return deduplicate
275
317
 
318
+
276
319
  def _clean_nulls(xs, dtype):
277
320
  """
278
321
  Numpy's masked arrays have a downside that in case they're not able to convert a (masked!) value to
@@ -291,16 +334,16 @@ def _clean_nulls(xs, dtype):
291
334
 
292
335
  assert ma.isMA(xs)
293
336
 
294
- if xs.dtype is not np.dtype('object'):
337
+ if xs.dtype is not np.dtype("object"):
295
338
  return xs
296
339
 
297
340
  fill_value = None
298
341
  if dtype == np.float64 or dtype == np.float32 or dtype == np.float16:
299
- fill_value = float('nan')
342
+ fill_value = float("nan")
300
343
  elif dtype == np.int64 or dtype == np.int32 or dtype == np.int16:
301
344
  fill_value = -1
302
- elif dtype == np.dtype('datetime64[ns]'):
303
- fill_value = np.datetime64('nat')
345
+ elif dtype == np.dtype("datetime64[ns]"):
346
+ fill_value = np.datetime64("nat")
304
347
 
305
348
  mask = xs.mask
306
349
  xs_ = xs.filled(fill_value)
@@ -308,7 +351,6 @@ def _clean_nulls(xs, dtype):
308
351
  return ma.array(xs_, mask=mask)
309
352
 
310
353
 
311
-
312
354
  def _coerce_data(data, dtype):
313
355
  """
314
356
  Coerces each numpy array of `data` to the dtype present in `dtype`.
@@ -325,7 +367,12 @@ def _coerce_data(data, dtype):
325
367
 
326
368
  assert ma.isMA(data_)
327
369
 
328
- logger.debug("data for column with offset %d was provided in dtype '%s', but need '%s': converting data...", i, data_.dtype, dtype_)
370
+ logger.debug(
371
+ "data for column with offset %d was provided in dtype '%s', but need '%s': converting data...",
372
+ i,
373
+ data_.dtype,
374
+ dtype_,
375
+ )
329
376
 
330
377
  logger.debug("dtype of data[%d] before: %s", i, data_.dtype)
331
378
  logger.debug("type of data[%d] after: %s", i, type(data_))
@@ -338,14 +385,20 @@ def _coerce_data(data, dtype):
338
385
  # One 'bug' is that, if everything is masked, the underlying data type can be
339
386
  # pretty much anything.
340
387
  if _is_all_masked(data_):
341
- logger.debug("array completely empty, re-initializing to empty array of '%s'", dtype_)
342
- data[i] = ma.masked_all(ma.size(data_),
343
- dtype=dtype_)
388
+ logger.debug(
389
+ "array completely empty, re-initializing to empty array of '%s'",
390
+ dtype_,
391
+ )
392
+ data[i] = ma.masked_all(ma.size(data_), dtype=dtype_)
344
393
 
345
394
  # Another 'bug' is that when the input data is objects, we may have null-like values (like pd.NA)
346
395
  # that cannot easily be converted to, say, float.
347
396
  else:
348
- logger.error("An error occured while coercing input data type from dtype '%s' to dtype '%s': ", data_.dtype, dtype_)
397
+ logger.error(
398
+ "An error occured while coercing input data type from dtype '%s' to dtype '%s': ",
399
+ data_.dtype,
400
+ dtype_,
401
+ )
349
402
  logger.exception(err)
350
403
  raise err
351
404
 
@@ -358,6 +411,7 @@ def _coerce_data(data, dtype):
358
411
 
359
412
  return data
360
413
 
414
+
361
415
  def _probe_length(xs):
362
416
  """
363
417
  Returns the length of the first non-null array in `xs`, or None if all arrays
@@ -372,6 +426,7 @@ def _probe_length(xs):
372
426
 
373
427
  return None
374
428
 
429
+
375
430
  def _ensure_list(xs, cinfos):
376
431
  """
377
432
  If input data is a dict, ensures it's converted to a list with the correct
@@ -422,12 +477,42 @@ def _coerce_retries(retries) -> quasardb.RetryOptions:
422
477
  elif isinstance(retries, quasardb.RetryOptions):
423
478
  return retries
424
479
  else:
425
- raise TypeError("retries should either be an integer or quasardb.RetryOptions, got: " + type(retries))
480
+ raise TypeError(
481
+ "retries should either be an integer or quasardb.RetryOptions, got: "
482
+ + type(retries)
483
+ )
484
+
485
+
486
+ def _kwarg_deprecation_warning(
487
+ old_kwarg, old_value, new_kwargs, new_values, stacklevel
488
+ ):
489
+ new_declaration = ", ".join(
490
+ f"{new_kwarg}={new_value}"
491
+ for new_kwarg, new_value in zip(new_kwargs, new_values)
492
+ )
493
+ warnings.warn(
494
+ f"The argument '{old_kwarg}' <{type(old_value).__name__}> is deprecated and will be removed in a future version. "
495
+ f"Please use '{new_declaration}' instead.",
496
+ DeprecationWarning,
497
+ stacklevel=stacklevel + 1,
498
+ )
499
+
500
+
501
+ def _type_check(var, var_name, target_type, raise_error=True, allow_none=True):
502
+ if allow_none and var is None:
503
+ return True
504
+ if not isinstance(var, target_type):
505
+ if raise_error:
506
+ raise quasardb.quasardb.InvalidArgumentError(
507
+ f"Invalid '{var_name}' type, expected: {target_type}, got: {type(var)}"
508
+ )
509
+ return False
510
+ return True
426
511
 
427
512
 
428
513
  def ensure_ma(xs, dtype=None):
429
514
  if isinstance(dtype, list):
430
- assert(isinstance(xs, list) == True)
515
+ assert isinstance(xs, list) == True
431
516
  return [ensure_ma(xs_, dtype_) for (xs_, dtype_) in zip(xs, dtype)]
432
517
 
433
518
  # Don't bother if we're already a masked array
@@ -440,7 +525,7 @@ def ensure_ma(xs, dtype=None):
440
525
 
441
526
  logger.debug("coercing array with dtype: %s", xs.dtype)
442
527
 
443
- if xs.dtype.kind in ['O', 'U', 'S']:
528
+ if xs.dtype.kind in ["O", "U", "S"]:
444
529
  logger.debug("Data is object-like, masking None values")
445
530
 
446
531
  mask = xs == None
@@ -450,21 +535,17 @@ def ensure_ma(xs, dtype=None):
450
535
  return ma.masked_invalid(xs, copy=False)
451
536
 
452
537
 
453
- def read_array(table=None,
454
- column=None,
455
- ranges=None):
538
+ def read_array(table=None, column=None, ranges=None):
456
539
  if table is None:
457
540
  raise RuntimeError("A table is required.")
458
541
 
459
542
  if column is None:
460
543
  raise RuntimeError("A column is required.")
461
544
 
462
- kwargs = {
463
- 'column': column
464
- }
545
+ kwargs = {"column": column}
465
546
 
466
547
  if ranges is not None:
467
- kwargs['ranges'] = ranges
548
+ kwargs["ranges"] = ranges
468
549
 
469
550
  read_with = {
470
551
  quasardb.ColumnType.Double: table.double_get_ranges,
@@ -482,12 +563,8 @@ def read_array(table=None,
482
563
 
483
564
 
484
565
  def write_array(
485
- data=None,
486
- index=None,
487
- table=None,
488
- column=None,
489
- dtype=None,
490
- infer_types=True):
566
+ data=None, index=None, table=None, column=None, dtype=None, infer_types=True
567
+ ):
491
568
  """
492
569
  Write a Numpy array to a single column.
493
570
 
@@ -527,9 +604,8 @@ def write_array(
527
604
  if index is None:
528
605
  raise RuntimeError("An index numpy timestamp array is required.")
529
606
 
530
-
531
607
  data = ensure_ma(data, dtype=dtype)
532
- ctype = table.column_type_by_id(column)
608
+ ctype = table.column_type_by_id(column)
533
609
 
534
610
  # We try to reuse some of the other functions, which assume array-like
535
611
  # shapes for column info and data. It's a bit hackish, but actually works
@@ -564,28 +640,39 @@ def write_array(
564
640
  quasardb.ColumnType.Timestamp: table.timestamp_insert,
565
641
  }
566
642
 
567
- logger.info("Writing array (%d rows of dtype %s) to columns %s.%s (type %s)", len(data), data.dtype, table.get_name(), column, ctype)
643
+ logger.info(
644
+ "Writing array (%d rows of dtype %s) to columns %s.%s (type %s)",
645
+ len(data),
646
+ data.dtype,
647
+ table.get_name(),
648
+ column,
649
+ ctype,
650
+ )
568
651
  write_with[ctype](column, index, data)
569
652
 
653
+
570
654
  def write_arrays(
571
- data,
572
- cluster,
573
- table = None,
574
- *,
575
- dtype = None,
576
- index = None,
577
- _async = False,
578
- fast = False,
579
- truncate = False,
580
- deduplicate = False,
581
- deduplication_mode = 'drop',
582
- infer_types = True,
583
- writer = None,
584
- write_through = False,
585
- retries = 3,
586
-
587
- # We accept additional kwargs that will be passed through the writer.push() methods
588
- **kwargs):
655
+ data,
656
+ cluster,
657
+ table=None,
658
+ *,
659
+ dtype=None,
660
+ index=None,
661
+ # TODO: Set the default push_mode after removing _async, fast and truncate
662
+ push_mode=None,
663
+ _async=False,
664
+ fast=False,
665
+ truncate=False,
666
+ truncate_range=None,
667
+ deduplicate=False,
668
+ deduplication_mode="drop",
669
+ infer_types=True,
670
+ writer=None,
671
+ write_through=True,
672
+ retries=3,
673
+ # We accept additional kwargs that will be passed through the writer.push() methods
674
+ **kwargs,
675
+ ):
589
676
  """
590
677
  Write multiple aligned numpy arrays to a table.
591
678
 
@@ -649,13 +736,32 @@ def write_arrays(
649
736
  Defaults to True. For production use cases where you want to avoid implicit conversions,
650
737
  we recommend setting this to False.
651
738
 
739
+ push_mode: optional quasardb.WriterPushMode
740
+ The mode used for inserting data. Can be either a string or a `WriterPushMode` enumeration item.
741
+ Available options:
742
+ * `Truncate`: Truncate (also referred to as upsert) the data in-place. Will detect time range
743
+ to truncate from the time range inside the dataframe.
744
+ * `Async`: Uses asynchronous insertion API where commits are buffered server-side and
745
+ acknowledged before they are written to disk. If you insert to the same table from
746
+ multiple processes, setting this to True may improve performance.
747
+ * `Fast`: Whether to use 'fast push'. If you incrementally add small batches of data to table,
748
+ you may see better performance if you set this to True.
749
+ * `Transactional`: Ensures full transactional consistency.
750
+
751
+ Defaults to `Transactional`.
752
+
652
753
  truncate: optional bool
754
+ **DEPRECATED** – Use `push_mode=WriterPushMode.Truncate` instead.
653
755
  Truncate (also referred to as upsert) the data in-place. Will detect time range to truncate
654
756
  from the time range inside the dataframe.
655
757
 
656
758
  Defaults to False.
657
759
 
760
+ truncate_range: optional tuple
761
+ Time range to truncate from the time range inside the dataframe.
762
+
658
763
  _async: optional bool
764
+ **DEPRECATED** – Use `push_mode=WriterPushMode.Async` instead.
659
765
  If true, uses asynchronous insertion API where commits are buffered server-side and
660
766
  acknowledged before they are written to disk. If you insert to the same table from
661
767
  multiple processes, setting this to True may improve performance.
@@ -663,6 +769,7 @@ def write_arrays(
663
769
  Defaults to False.
664
770
 
665
771
  fast: optional bool
772
+ **DEPRECATED** – Use `push_mode=WriterPushMode.Fast` instead.
666
773
  Whether to use 'fast push'. If you incrementally add small batches of data to table,
667
774
  you may see better performance if you set this to True.
668
775
 
@@ -691,34 +798,60 @@ def write_arrays(
691
798
 
692
799
  if table:
693
800
  logger.debug("table explicitly provided, assuming single-table write")
694
- return write_arrays([(table, data)],
695
- cluster,
696
- table=None,
697
- dtype=dtype,
698
- index=index,
699
- _async=_async,
700
- fast=fast,
701
- truncate=truncate,
702
- deduplicate=deduplicate,
703
- deduplication_mode=deduplication_mode,
704
- infer_types=infer_types,
705
- write_through=write_through,
706
- writer=writer,
707
- retries=retries,
708
- **kwargs)
709
-
801
+ data = [(table, data)]
802
+ table = None
803
+
804
+ _type_check(push_mode, "push_mode", target_type=quasardb.WriterPushMode)
805
+ deprecation_stacklevel = kwargs.pop("deprecation_stacklevel", 1) + 1
806
+
807
+ if isinstance(truncate, tuple):
808
+ # Especial case, truncate might be a tuple indicating the range.
809
+ _kwarg_deprecation_warning(
810
+ "truncate",
811
+ truncate,
812
+ ["push_mode", "truncate_range"],
813
+ [quasardb.WriterPushMode.Truncate, truncate],
814
+ deprecation_stacklevel,
815
+ )
816
+ truncate_range = truncate_range or truncate
817
+ truncate = True
818
+
819
+ kwarg_to_mode = {
820
+ # "kwarg": (kwarg_type, kwarg_push_mode, is_deprecated)
821
+ "fast": (bool, quasardb.WriterPushMode.Fast, True),
822
+ "_async": (bool, quasardb.WriterPushMode.Async, True),
823
+ "truncate": (bool, quasardb.WriterPushMode.Truncate, True),
824
+ "truncate_range": (tuple, quasardb.WriterPushMode.Truncate, False),
825
+ }
710
826
 
711
- ret = []
827
+ for kwarg, info in kwarg_to_mode.items():
828
+ expected_type, mode, deprecated = info
829
+ kwarg_value = locals()[kwarg]
830
+ _type_check(kwarg_value, kwarg, target_type=expected_type)
831
+
832
+ if kwarg_value:
833
+ if push_mode and push_mode != mode:
834
+ raise quasardb.quasardb.InvalidArgumentError(
835
+ f"Found '{kwarg}' in kwargs, but push mode is already set to {push_mode}"
836
+ )
837
+ push_mode = mode
838
+ if deprecated:
839
+ _kwarg_deprecation_warning(
840
+ kwarg, kwarg_value, ["push_mode"], [mode], deprecation_stacklevel
841
+ )
842
+
843
+ if not push_mode:
844
+ push_mode = quasardb.WriterPushMode.Transactional
712
845
 
713
846
  # Create batch column info from dataframe
714
847
  if writer is None:
715
848
  writer = cluster.writer()
716
849
 
850
+ ret = []
717
851
  n_rows = 0
718
-
719
852
  push_data = quasardb.WriterData()
720
853
 
721
- for (table, data_) in data:
854
+ for table, data_ in data:
722
855
  # Acquire reference to table if string is provided
723
856
  if isinstance(table, str):
724
857
  table = table_cache.lookup(table, cluster)
@@ -729,10 +862,15 @@ def write_arrays(
729
862
  assert type(dtype) is list
730
863
  assert len(dtype) is len(cinfos)
731
864
 
732
-
733
- if index is None and isinstance(data_, dict) and '$timestamp' in data_:
734
- index_ = data_.pop('$timestamp')
735
- assert '$timestamp' not in data_
865
+ if index is None and isinstance(data_, dict) and "$timestamp" in data_:
866
+ # Create shallow copy of `data_` so that we don't modify the reference, i.e.
867
+ # delete keys.
868
+ #
869
+ # This ensures that the user can call the same function multiple times without
870
+ # side-effects.
871
+ data_ = data_.copy()
872
+ index_ = data_.pop("$timestamp")
873
+ assert "$timestamp" not in data_
736
874
  elif index is not None:
737
875
  index_ = index
738
876
  else:
@@ -751,7 +889,6 @@ def write_arrays(
751
889
  data_ = ensure_ma(data_, dtype=dtype)
752
890
  data_ = _coerce_data(data_, dtype)
753
891
 
754
-
755
892
  # Just some additional friendly information about incorrect dtypes, we'd
756
893
  # prefer to have this information thrown from Python instead of native
757
894
  # code as it generally makes for somewhat better error context.
@@ -778,37 +915,27 @@ def write_arrays(
778
915
  # The initial use case was that so we can add additional parameters for test mocks, e.g. `mock_failures` so that
779
916
  # we can validate the retry functionality.
780
917
  push_kwargs = kwargs
781
- push_kwargs['deduplicate'] = deduplicate
782
- push_kwargs['deduplication_mode'] = deduplication_mode
783
- push_kwargs['write_through'] = write_through
784
- push_kwargs['retries'] = retries
918
+ push_kwargs["deduplicate"] = deduplicate
919
+ push_kwargs["deduplication_mode"] = deduplication_mode
920
+ push_kwargs["write_through"] = write_through
921
+ push_kwargs["retries"] = retries
922
+ push_kwargs["push_mode"] = push_mode
923
+ if truncate_range:
924
+ push_kwargs["range"] = truncate_range
785
925
 
786
926
  logger.debug("pushing %d rows", n_rows)
787
927
  start = time.time()
788
928
 
789
- if fast is True:
790
- push_kwargs['push_mode'] = quasardb.WriterPushMode.Fast
791
- elif truncate is True:
792
- push_kwargs['push_mode'] = quasardb.WriterPushMode.Truncate
793
- elif isinstance(truncate, tuple):
794
- push_kwargs['push_mode'] = quasardb.WriterPushMode.Truncate
795
- push_kwargs['range'] = truncate
796
- elif _async is True:
797
- push_kwargs['push_mode'] = quasardb.WriterPushMode.Async
798
- else:
799
- push_kwargs['push_mode'] = quasardb.WriterPushMode.Transactional
800
-
801
929
  writer.push(push_data, **push_kwargs)
802
930
 
803
- logger.debug("pushed %d rows in %s seconds",
804
- n_rows, (time.time() - start))
931
+ logger.debug("pushed %d rows in %s seconds", n_rows, (time.time() - start))
805
932
 
806
933
  return ret
807
934
 
808
935
 
809
936
  def _xform_query_results(xs, index, dict):
810
937
  if len(xs) == 0:
811
- return (np.array([], np.dtype('datetime64[ns]')), np.array([]))
938
+ return (np.array([], np.dtype("datetime64[ns]")), np.array([]))
812
939
 
813
940
  n = None
814
941
  for x in xs:
@@ -822,8 +949,8 @@ def _xform_query_results(xs, index, dict):
822
949
  if index is None:
823
950
  # Generate a range, put it in the front of the result list,
824
951
  # recurse and tell the function to use that index.
825
- xs_ = [('$index', np.arange(n))] + xs
826
- return _xform_query_results(xs_, '$index', dict)
952
+ xs_ = [("$index", np.arange(n))] + xs
953
+ return _xform_query_results(xs_, "$index", dict)
827
954
 
828
955
  if isinstance(index, str):
829
956
  for i in range(len(xs)):
@@ -833,10 +960,18 @@ def _xform_query_results(xs, index, dict):
833
960
  # recurse with that offset
834
961
  return _xform_query_results(xs, i, dict)
835
962
 
836
- raise KeyError("Unable to resolve index column: column not found in results: {}".format(index))
963
+ raise KeyError(
964
+ "Unable to resolve index column: column not found in results: {}".format(
965
+ index
966
+ )
967
+ )
837
968
 
838
969
  if not isinstance(index, int):
839
- raise TypeError("Unable to resolve index column: unrecognized type {}: {}".format(type(index), index))
970
+ raise TypeError(
971
+ "Unable to resolve index column: unrecognized type {}: {}".format(
972
+ type(index), index
973
+ )
974
+ )
840
975
 
841
976
  idx = xs[index][1]
842
977
  del xs[index]
@@ -845,7 +980,9 @@ def _xform_query_results(xs, index, dict):
845
980
  # masked items: we cannot not have an index for a certain row.
846
981
  if ma.isMA(idx):
847
982
  if ma.count_masked(idx) > 0:
848
- raise ValueError("Invalid index: null values detected. An index is never allowed to have null values.")
983
+ raise ValueError(
984
+ "Invalid index: null values detected. An index is never allowed to have null values."
985
+ )
849
986
 
850
987
  assert isinstance(idx.data, np.ndarray)
851
988
  idx = idx.data
@@ -860,10 +997,7 @@ def _xform_query_results(xs, index, dict):
860
997
  return (idx, xs_)
861
998
 
862
999
 
863
- def query(cluster,
864
- query,
865
- index=None,
866
- dict=False):
1000
+ def query(cluster, query, index=None, dict=False):
867
1001
  """
868
1002
  Execute a query and return the results as numpy arrays. The shape of the return value
869
1003
  is always: