quasardb 3.14.2.dev3__cp39-cp39-win_amd64.whl → 3.14.2.dev5__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quasardb might be problematic. Click here for more details.
- quasardb/INSTALL.vcxproj +5 -4
- quasardb/__init__.py +21 -7
- quasardb/cmake_install.cmake +6 -0
- quasardb/date/ALL_BUILD.vcxproj +9 -8
- quasardb/date/CMakeFiles/Export/df49adab93b9e0c10c64f72458b31971/dateTargets.cmake +2 -2
- quasardb/date/CMakeFiles/generate.stamp.depend +4 -4
- quasardb/date/INSTALL.vcxproj +5 -4
- quasardb/date/cmake_install.cmake +6 -0
- quasardb/date/dateTargets.cmake +2 -2
- quasardb/extensions/writer.py +59 -61
- quasardb/firehose.py +24 -22
- quasardb/numpy/__init__.py +181 -120
- quasardb/pandas/__init__.py +145 -95
- quasardb/pool.py +13 -2
- quasardb/pybind11/ALL_BUILD.vcxproj +9 -8
- quasardb/pybind11/CMakeFiles/generate.stamp.depend +14 -14
- quasardb/pybind11/INSTALL.vcxproj +5 -4
- quasardb/pybind11/cmake_install.cmake +6 -0
- quasardb/qdb_api.dll +0 -0
- quasardb/quasardb.cp39-win_amd64.pyd +0 -0
- quasardb/range-v3/ALL_BUILD.vcxproj +9 -8
- quasardb/range-v3/CMakeFiles/Export/d94ef200eca10a819b5858b33e808f5b/range-v3-targets.cmake +2 -2
- quasardb/range-v3/CMakeFiles/generate.stamp.depend +11 -11
- quasardb/range-v3/INSTALL.vcxproj +5 -4
- quasardb/range-v3/cmake_install.cmake +6 -0
- quasardb/range-v3/range-v3-config.cmake +2 -2
- quasardb/range-v3/range.v3.headers.vcxproj +9 -8
- quasardb/stats.py +92 -80
- quasardb/table_cache.py +5 -1
- {quasardb-3.14.2.dev3.dist-info → quasardb-3.14.2.dev5.dist-info}/METADATA +13 -8
- quasardb-3.14.2.dev5.dist-info/RECORD +54 -0
- {quasardb-3.14.2.dev3.dist-info → quasardb-3.14.2.dev5.dist-info}/WHEEL +1 -1
- quasardb/CMakeLists.txt +0 -517
- quasardb/batch_column.hpp +0 -80
- quasardb/batch_inserter.hpp +0 -248
- quasardb/blob.hpp +0 -150
- quasardb/cluster.cpp +0 -102
- quasardb/cluster.hpp +0 -593
- quasardb/concepts.hpp +0 -322
- quasardb/continuous.cpp +0 -199
- quasardb/continuous.hpp +0 -109
- quasardb/convert/array.hpp +0 -299
- quasardb/convert/point.hpp +0 -330
- quasardb/convert/range.hpp +0 -282
- quasardb/convert/unicode.hpp +0 -598
- quasardb/convert/util.hpp +0 -22
- quasardb/convert/value.hpp +0 -782
- quasardb/convert.hpp +0 -38
- quasardb/detail/invoke.hpp +0 -0
- quasardb/detail/qdb_resource.hpp +0 -129
- quasardb/detail/retry.cpp +0 -30
- quasardb/detail/retry.hpp +0 -147
- quasardb/detail/sleep.hpp +0 -53
- quasardb/detail/ts_column.hpp +0 -224
- quasardb/detail/writer.cpp +0 -440
- quasardb/detail/writer.hpp +0 -550
- quasardb/direct_blob.hpp +0 -108
- quasardb/direct_handle.hpp +0 -83
- quasardb/direct_integer.hpp +0 -94
- quasardb/dispatch.hpp +0 -157
- quasardb/double.hpp +0 -87
- quasardb/entry.hpp +0 -273
- quasardb/error.hpp +0 -393
- quasardb/handle.cpp +0 -29
- quasardb/handle.hpp +0 -98
- quasardb/integer.hpp +0 -88
- quasardb/logger.cpp +0 -106
- quasardb/logger.hpp +0 -228
- quasardb/masked_array.hpp +0 -658
- quasardb/metrics.cpp +0 -103
- quasardb/metrics.hpp +0 -112
- quasardb/module.cpp +0 -92
- quasardb/module.hpp +0 -24
- quasardb/node.hpp +0 -132
- quasardb/numpy.cpp +0 -6
- quasardb/numpy.hpp +0 -489
- quasardb/object_tracker.hpp +0 -282
- quasardb/options.hpp +0 -273
- quasardb/perf.hpp +0 -336
- quasardb/properties.cpp +0 -41
- quasardb/properties.hpp +0 -85
- quasardb/pytypes.hpp +0 -221
- quasardb/query.cpp +0 -420
- quasardb/query.hpp +0 -92
- quasardb/reader.cpp +0 -282
- quasardb/reader.hpp +0 -256
- quasardb/remove_cvref.hpp +0 -31
- quasardb/string.hpp +0 -160
- quasardb/table.cpp +0 -257
- quasardb/table.hpp +0 -366
- quasardb/tag.hpp +0 -77
- quasardb/timestamp.hpp +0 -97
- quasardb/traits.hpp +0 -642
- quasardb/ts_iterator.hpp +0 -193
- quasardb/utils/blob_deque.hpp +0 -96
- quasardb/utils/ostream.hpp +0 -17
- quasardb/utils/permutation.hpp +0 -50
- quasardb/utils/stable_sort.hpp +0 -25
- quasardb/utils/unzip_view.hpp +0 -89
- quasardb/utils.cpp +0 -28
- quasardb/utils.hpp +0 -174
- quasardb/writer.hpp +0 -354
- quasardb-3.14.2.dev3.dist-info/RECORD +0 -124
- {quasardb-3.14.2.dev3.dist-info → quasardb-3.14.2.dev5.dist-info}/LICENSE.md +0 -0
- {quasardb-3.14.2.dev3.dist-info → quasardb-3.14.2.dev5.dist-info}/top_level.txt +0 -0
quasardb/numpy/__init__.py
CHANGED
|
@@ -33,7 +33,7 @@ import time
|
|
|
33
33
|
import quasardb
|
|
34
34
|
import quasardb.table_cache as table_cache
|
|
35
35
|
|
|
36
|
-
logger = logging.getLogger(
|
|
36
|
+
logger = logging.getLogger("quasardb.numpy")
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class NumpyRequired(ImportError):
|
|
@@ -41,6 +41,7 @@ class NumpyRequired(ImportError):
|
|
|
41
41
|
Exception raised when trying to use QuasarDB pandas integration, but
|
|
42
42
|
pandas has not been installed.
|
|
43
43
|
"""
|
|
44
|
+
|
|
44
45
|
pass
|
|
45
46
|
|
|
46
47
|
|
|
@@ -52,7 +53,7 @@ except ImportError as err:
|
|
|
52
53
|
logger.exception(err)
|
|
53
54
|
raise NumpyRequired(
|
|
54
55
|
"The numpy library is required to handle numpy arrays formats"
|
|
55
|
-
|
|
56
|
+
) from err
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
class IncompatibleDtypeError(TypeError):
|
|
@@ -68,13 +69,16 @@ class IncompatibleDtypeError(TypeError):
|
|
|
68
69
|
super().__init__(self.msg())
|
|
69
70
|
|
|
70
71
|
def msg(self):
|
|
71
|
-
return "Data for column '{}' with type '{}' was provided in dtype '{}' but need '{}'.".format(
|
|
72
|
+
return "Data for column '{}' with type '{}' was provided in dtype '{}' but need '{}'.".format(
|
|
73
|
+
self.cname, self.ctype, self.provided, self.expected
|
|
74
|
+
)
|
|
72
75
|
|
|
73
76
|
|
|
74
77
|
class IncompatibleDtypeErrors(TypeError):
|
|
75
78
|
"""
|
|
76
79
|
Wraps multiple dtype errors
|
|
77
80
|
"""
|
|
81
|
+
|
|
78
82
|
def __init__(self, xs):
|
|
79
83
|
self.xs = xs
|
|
80
84
|
super().__init__(self.msg())
|
|
@@ -82,29 +86,33 @@ class IncompatibleDtypeErrors(TypeError):
|
|
|
82
86
|
def msg(self):
|
|
83
87
|
return "\n".join(x.msg() for x in self.xs)
|
|
84
88
|
|
|
89
|
+
|
|
85
90
|
class InvalidDataCardinalityError(ValueError):
|
|
86
91
|
"""
|
|
87
92
|
Raised when the provided data arrays doesn't match the table's columns.
|
|
88
93
|
"""
|
|
94
|
+
|
|
89
95
|
def __init__(self, data, cinfos):
|
|
90
96
|
self.data = data
|
|
91
97
|
self.cinfos = cinfos
|
|
92
98
|
super().__init__(self.msg())
|
|
93
99
|
|
|
94
100
|
def msg(self):
|
|
95
|
-
return "Provided data array length '{}' exceeds amount of table columns '{}', unable to map data to columns".format(
|
|
101
|
+
return "Provided data array length '{}' exceeds amount of table columns '{}', unable to map data to columns".format(
|
|
102
|
+
len(self.data), len(self.cinfos)
|
|
103
|
+
)
|
|
96
104
|
|
|
97
105
|
|
|
98
106
|
# Based on QuasarDB column types, which dtype do we accept?
|
|
99
107
|
# First entry will always be the 'preferred' dtype, other ones
|
|
100
108
|
# those that we can natively convert in native code.
|
|
101
109
|
_ctype_to_dtype = {
|
|
102
|
-
quasardb.ColumnType.String: [np.dtype(
|
|
103
|
-
quasardb.ColumnType.Symbol: [np.dtype(
|
|
104
|
-
quasardb.ColumnType.Int64: [np.dtype(
|
|
105
|
-
quasardb.ColumnType.Double: [np.dtype(
|
|
106
|
-
quasardb.ColumnType.Blob: [np.dtype(
|
|
107
|
-
quasardb.ColumnType.Timestamp: [np.dtype(
|
|
110
|
+
quasardb.ColumnType.String: [np.dtype("U")],
|
|
111
|
+
quasardb.ColumnType.Symbol: [np.dtype("U")],
|
|
112
|
+
quasardb.ColumnType.Int64: [np.dtype("i8"), np.dtype("i4"), np.dtype("i2")],
|
|
113
|
+
quasardb.ColumnType.Double: [np.dtype("f8"), np.dtype("f4")],
|
|
114
|
+
quasardb.ColumnType.Blob: [np.dtype("S"), np.dtype("O")],
|
|
115
|
+
quasardb.ColumnType.Timestamp: [np.dtype("datetime64[ns]")],
|
|
108
116
|
}
|
|
109
117
|
|
|
110
118
|
|
|
@@ -141,9 +149,12 @@ def _coerce_dtype(dtype, columns):
|
|
|
141
149
|
# Any columns not provided will have a 'None' dtype.
|
|
142
150
|
dtype_ = [None] * len(columns)
|
|
143
151
|
|
|
144
|
-
for
|
|
152
|
+
for k, dt in dtype.items():
|
|
145
153
|
if not k in offsets:
|
|
146
|
-
logger.warn(
|
|
154
|
+
logger.warn(
|
|
155
|
+
"Forced dtype provided for column '%s' = %s, but that column is not found in the table. Skipping...",
|
|
156
|
+
k,
|
|
157
|
+
)
|
|
147
158
|
|
|
148
159
|
i = offsets[k]
|
|
149
160
|
dtype_[i] = dt
|
|
@@ -151,16 +162,22 @@ def _coerce_dtype(dtype, columns):
|
|
|
151
162
|
dtype = dtype_
|
|
152
163
|
|
|
153
164
|
if type(dtype) is not list:
|
|
154
|
-
raise ValueError(
|
|
165
|
+
raise ValueError(
|
|
166
|
+
"Forced dtype argument provided, but the argument has an incompatible type. Expected: list-like or dict-like, got: {}".format(
|
|
167
|
+
type(dtype)
|
|
168
|
+
)
|
|
169
|
+
)
|
|
155
170
|
|
|
156
171
|
if len(dtype) is not len(columns):
|
|
157
|
-
raise ValueError(
|
|
172
|
+
raise ValueError(
|
|
173
|
+
"Expected exactly one dtype for each column, but %d dtypes were provided for %d columns".format(
|
|
174
|
+
len(dtype), len(columns)
|
|
175
|
+
)
|
|
176
|
+
)
|
|
158
177
|
|
|
159
178
|
return dtype
|
|
160
179
|
|
|
161
180
|
|
|
162
|
-
|
|
163
|
-
|
|
164
181
|
def _add_desired_dtypes(dtype, columns):
|
|
165
182
|
"""
|
|
166
183
|
When infer_types=True, this function sets the 'desired' dtype for each of the columns.
|
|
@@ -174,7 +191,12 @@ def _add_desired_dtypes(dtype, columns):
|
|
|
174
191
|
if dtype[i] is None:
|
|
175
192
|
(cname, ctype) = columns[i]
|
|
176
193
|
dtype_ = _best_dtype_for_ctype(ctype)
|
|
177
|
-
logger.debug(
|
|
194
|
+
logger.debug(
|
|
195
|
+
"using default dtype '%s' for column '%s' with type %s",
|
|
196
|
+
dtype_,
|
|
197
|
+
cname,
|
|
198
|
+
ctype,
|
|
199
|
+
)
|
|
178
200
|
dtype[i] = dtype_
|
|
179
201
|
|
|
180
202
|
return dtype
|
|
@@ -196,8 +218,11 @@ def _is_all_masked(xs):
|
|
|
196
218
|
# built-ins for object arrays
|
|
197
219
|
return all(x is None for x in xs)
|
|
198
220
|
|
|
199
|
-
|
|
200
|
-
|
|
221
|
+
logger.debug(
|
|
222
|
+
"{} is not a masked array, not convertible to requested type... ".format(
|
|
223
|
+
type(xs)
|
|
224
|
+
)
|
|
225
|
+
)
|
|
201
226
|
|
|
202
227
|
# This array is *not* a masked array, it's *not* convertible to the type we want,
|
|
203
228
|
# and it's *not* an object array.
|
|
@@ -208,7 +233,7 @@ def _is_all_masked(xs):
|
|
|
208
233
|
|
|
209
234
|
|
|
210
235
|
def dtypes_equal(lhs, rhs):
|
|
211
|
-
if lhs.kind ==
|
|
236
|
+
if lhs.kind == "U" or lhs.kind == "S":
|
|
212
237
|
# Unicode and string data has variable length encoding, which means their itemsize
|
|
213
238
|
# can be anything.
|
|
214
239
|
#
|
|
@@ -236,43 +261,60 @@ def _validate_dtypes(data, columns):
|
|
|
236
261
|
(cname, ctype, provided_dtype, expected_dtype) = e
|
|
237
262
|
return
|
|
238
263
|
|
|
239
|
-
for
|
|
264
|
+
for data_, (cname, ctype) in zip(data, columns):
|
|
240
265
|
expected_ = _ctype_to_dtype[ctype]
|
|
241
266
|
|
|
242
267
|
logger.debug("data_.dtype = %s, expected_ = %s", data_.dtype, expected_)
|
|
243
268
|
|
|
244
269
|
if _dtype_found(data_.dtype, expected_) == False:
|
|
245
|
-
errors.append(
|
|
270
|
+
errors.append(
|
|
271
|
+
IncompatibleDtypeError(
|
|
272
|
+
cname=cname, ctype=ctype, provided=data_.dtype, expected=expected_
|
|
273
|
+
)
|
|
274
|
+
)
|
|
246
275
|
|
|
247
276
|
if len(errors) > 0:
|
|
248
277
|
raise IncompatibleDtypeErrors(errors)
|
|
249
278
|
|
|
279
|
+
|
|
250
280
|
def _coerce_deduplicate(deduplicate, deduplication_mode, columns):
|
|
251
281
|
"""
|
|
252
282
|
Throws an error when 'deduplicate' options are incorrect.
|
|
253
283
|
"""
|
|
254
284
|
cnames = [cname for (cname, ctype) in columns]
|
|
255
285
|
|
|
256
|
-
if deduplication_mode not in [
|
|
257
|
-
raise RuntimeError(
|
|
286
|
+
if deduplication_mode not in ["drop", "upsert"]:
|
|
287
|
+
raise RuntimeError(
|
|
288
|
+
"deduplication_mode should be one of ['drop', 'upsert'], got: {}".format(
|
|
289
|
+
deduplication_mode
|
|
290
|
+
)
|
|
291
|
+
)
|
|
258
292
|
|
|
259
293
|
if isinstance(deduplicate, bool):
|
|
260
294
|
return deduplicate
|
|
261
295
|
|
|
262
296
|
# Special value of $timestamp, hardcoded
|
|
263
|
-
if isinstance(deduplicate, str) and deduplicate ==
|
|
264
|
-
deduplicate = [
|
|
265
|
-
cnames.append(
|
|
297
|
+
if isinstance(deduplicate, str) and deduplicate == "$timestamp":
|
|
298
|
+
deduplicate = ["$timestamp"]
|
|
299
|
+
cnames.append("$timestamp")
|
|
266
300
|
|
|
267
301
|
if not isinstance(deduplicate, list):
|
|
268
|
-
raise TypeError(
|
|
302
|
+
raise TypeError(
|
|
303
|
+
"drop_duplicates should be either a bool or a list, got: "
|
|
304
|
+
+ type(deduplicate)
|
|
305
|
+
)
|
|
269
306
|
|
|
270
307
|
for column_name in deduplicate:
|
|
271
308
|
if not column_name in cnames:
|
|
272
|
-
raise RuntimeError(
|
|
309
|
+
raise RuntimeError(
|
|
310
|
+
"Provided deduplication column name '{}' not found in table columns.".format(
|
|
311
|
+
column_name
|
|
312
|
+
)
|
|
313
|
+
)
|
|
273
314
|
|
|
274
315
|
return deduplicate
|
|
275
316
|
|
|
317
|
+
|
|
276
318
|
def _clean_nulls(xs, dtype):
|
|
277
319
|
"""
|
|
278
320
|
Numpy's masked arrays have a downside that in case they're not able to convert a (masked!) value to
|
|
@@ -291,16 +333,16 @@ def _clean_nulls(xs, dtype):
|
|
|
291
333
|
|
|
292
334
|
assert ma.isMA(xs)
|
|
293
335
|
|
|
294
|
-
if xs.dtype is not np.dtype(
|
|
336
|
+
if xs.dtype is not np.dtype("object"):
|
|
295
337
|
return xs
|
|
296
338
|
|
|
297
339
|
fill_value = None
|
|
298
340
|
if dtype == np.float64 or dtype == np.float32 or dtype == np.float16:
|
|
299
|
-
fill_value = float(
|
|
341
|
+
fill_value = float("nan")
|
|
300
342
|
elif dtype == np.int64 or dtype == np.int32 or dtype == np.int16:
|
|
301
343
|
fill_value = -1
|
|
302
|
-
elif dtype == np.dtype(
|
|
303
|
-
fill_value = np.datetime64(
|
|
344
|
+
elif dtype == np.dtype("datetime64[ns]"):
|
|
345
|
+
fill_value = np.datetime64("nat")
|
|
304
346
|
|
|
305
347
|
mask = xs.mask
|
|
306
348
|
xs_ = xs.filled(fill_value)
|
|
@@ -308,7 +350,6 @@ def _clean_nulls(xs, dtype):
|
|
|
308
350
|
return ma.array(xs_, mask=mask)
|
|
309
351
|
|
|
310
352
|
|
|
311
|
-
|
|
312
353
|
def _coerce_data(data, dtype):
|
|
313
354
|
"""
|
|
314
355
|
Coerces each numpy array of `data` to the dtype present in `dtype`.
|
|
@@ -325,7 +366,12 @@ def _coerce_data(data, dtype):
|
|
|
325
366
|
|
|
326
367
|
assert ma.isMA(data_)
|
|
327
368
|
|
|
328
|
-
logger.debug(
|
|
369
|
+
logger.debug(
|
|
370
|
+
"data for column with offset %d was provided in dtype '%s', but need '%s': converting data...",
|
|
371
|
+
i,
|
|
372
|
+
data_.dtype,
|
|
373
|
+
dtype_,
|
|
374
|
+
)
|
|
329
375
|
|
|
330
376
|
logger.debug("dtype of data[%d] before: %s", i, data_.dtype)
|
|
331
377
|
logger.debug("type of data[%d] after: %s", i, type(data_))
|
|
@@ -338,14 +384,20 @@ def _coerce_data(data, dtype):
|
|
|
338
384
|
# One 'bug' is that, if everything is masked, the underlying data type can be
|
|
339
385
|
# pretty much anything.
|
|
340
386
|
if _is_all_masked(data_):
|
|
341
|
-
logger.debug(
|
|
342
|
-
|
|
343
|
-
|
|
387
|
+
logger.debug(
|
|
388
|
+
"array completely empty, re-initializing to empty array of '%s'",
|
|
389
|
+
dtype_,
|
|
390
|
+
)
|
|
391
|
+
data[i] = ma.masked_all(ma.size(data_), dtype=dtype_)
|
|
344
392
|
|
|
345
393
|
# Another 'bug' is that when the input data is objects, we may have null-like values (like pd.NA)
|
|
346
394
|
# that cannot easily be converted to, say, float.
|
|
347
395
|
else:
|
|
348
|
-
logger.error(
|
|
396
|
+
logger.error(
|
|
397
|
+
"An error occured while coercing input data type from dtype '%s' to dtype '%s': ",
|
|
398
|
+
data_.dtype,
|
|
399
|
+
dtype_,
|
|
400
|
+
)
|
|
349
401
|
logger.exception(err)
|
|
350
402
|
raise err
|
|
351
403
|
|
|
@@ -358,6 +410,7 @@ def _coerce_data(data, dtype):
|
|
|
358
410
|
|
|
359
411
|
return data
|
|
360
412
|
|
|
413
|
+
|
|
361
414
|
def _probe_length(xs):
|
|
362
415
|
"""
|
|
363
416
|
Returns the length of the first non-null array in `xs`, or None if all arrays
|
|
@@ -372,6 +425,7 @@ def _probe_length(xs):
|
|
|
372
425
|
|
|
373
426
|
return None
|
|
374
427
|
|
|
428
|
+
|
|
375
429
|
def _ensure_list(xs, cinfos):
|
|
376
430
|
"""
|
|
377
431
|
If input data is a dict, ensures it's converted to a list with the correct
|
|
@@ -422,12 +476,15 @@ def _coerce_retries(retries) -> quasardb.RetryOptions:
|
|
|
422
476
|
elif isinstance(retries, quasardb.RetryOptions):
|
|
423
477
|
return retries
|
|
424
478
|
else:
|
|
425
|
-
raise TypeError(
|
|
479
|
+
raise TypeError(
|
|
480
|
+
"retries should either be an integer or quasardb.RetryOptions, got: "
|
|
481
|
+
+ type(retries)
|
|
482
|
+
)
|
|
426
483
|
|
|
427
484
|
|
|
428
485
|
def ensure_ma(xs, dtype=None):
|
|
429
486
|
if isinstance(dtype, list):
|
|
430
|
-
assert
|
|
487
|
+
assert isinstance(xs, list) == True
|
|
431
488
|
return [ensure_ma(xs_, dtype_) for (xs_, dtype_) in zip(xs, dtype)]
|
|
432
489
|
|
|
433
490
|
# Don't bother if we're already a masked array
|
|
@@ -440,7 +497,7 @@ def ensure_ma(xs, dtype=None):
|
|
|
440
497
|
|
|
441
498
|
logger.debug("coercing array with dtype: %s", xs.dtype)
|
|
442
499
|
|
|
443
|
-
if xs.dtype.kind in [
|
|
500
|
+
if xs.dtype.kind in ["O", "U", "S"]:
|
|
444
501
|
logger.debug("Data is object-like, masking None values")
|
|
445
502
|
|
|
446
503
|
mask = xs == None
|
|
@@ -450,21 +507,17 @@ def ensure_ma(xs, dtype=None):
|
|
|
450
507
|
return ma.masked_invalid(xs, copy=False)
|
|
451
508
|
|
|
452
509
|
|
|
453
|
-
def read_array(table=None,
|
|
454
|
-
column=None,
|
|
455
|
-
ranges=None):
|
|
510
|
+
def read_array(table=None, column=None, ranges=None):
|
|
456
511
|
if table is None:
|
|
457
512
|
raise RuntimeError("A table is required.")
|
|
458
513
|
|
|
459
514
|
if column is None:
|
|
460
515
|
raise RuntimeError("A column is required.")
|
|
461
516
|
|
|
462
|
-
kwargs = {
|
|
463
|
-
'column': column
|
|
464
|
-
}
|
|
517
|
+
kwargs = {"column": column}
|
|
465
518
|
|
|
466
519
|
if ranges is not None:
|
|
467
|
-
kwargs[
|
|
520
|
+
kwargs["ranges"] = ranges
|
|
468
521
|
|
|
469
522
|
read_with = {
|
|
470
523
|
quasardb.ColumnType.Double: table.double_get_ranges,
|
|
@@ -482,12 +535,8 @@ def read_array(table=None,
|
|
|
482
535
|
|
|
483
536
|
|
|
484
537
|
def write_array(
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
table=None,
|
|
488
|
-
column=None,
|
|
489
|
-
dtype=None,
|
|
490
|
-
infer_types=True):
|
|
538
|
+
data=None, index=None, table=None, column=None, dtype=None, infer_types=True
|
|
539
|
+
):
|
|
491
540
|
"""
|
|
492
541
|
Write a Numpy array to a single column.
|
|
493
542
|
|
|
@@ -527,9 +576,8 @@ def write_array(
|
|
|
527
576
|
if index is None:
|
|
528
577
|
raise RuntimeError("An index numpy timestamp array is required.")
|
|
529
578
|
|
|
530
|
-
|
|
531
579
|
data = ensure_ma(data, dtype=dtype)
|
|
532
|
-
ctype =
|
|
580
|
+
ctype = table.column_type_by_id(column)
|
|
533
581
|
|
|
534
582
|
# We try to reuse some of the other functions, which assume array-like
|
|
535
583
|
# shapes for column info and data. It's a bit hackish, but actually works
|
|
@@ -564,28 +612,36 @@ def write_array(
|
|
|
564
612
|
quasardb.ColumnType.Timestamp: table.timestamp_insert,
|
|
565
613
|
}
|
|
566
614
|
|
|
567
|
-
logger.info(
|
|
615
|
+
logger.info(
|
|
616
|
+
"Writing array (%d rows of dtype %s) to columns %s.%s (type %s)",
|
|
617
|
+
len(data),
|
|
618
|
+
data.dtype,
|
|
619
|
+
table.get_name(),
|
|
620
|
+
column,
|
|
621
|
+
ctype,
|
|
622
|
+
)
|
|
568
623
|
write_with[ctype](column, index, data)
|
|
569
624
|
|
|
625
|
+
|
|
570
626
|
def write_arrays(
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
627
|
+
data,
|
|
628
|
+
cluster,
|
|
629
|
+
table=None,
|
|
630
|
+
*,
|
|
631
|
+
dtype=None,
|
|
632
|
+
index=None,
|
|
633
|
+
_async=False,
|
|
634
|
+
fast=False,
|
|
635
|
+
truncate=False,
|
|
636
|
+
deduplicate=False,
|
|
637
|
+
deduplication_mode="drop",
|
|
638
|
+
infer_types=True,
|
|
639
|
+
writer=None,
|
|
640
|
+
write_through=False,
|
|
641
|
+
retries=3,
|
|
642
|
+
# We accept additional kwargs that will be passed through the writer.push() methods
|
|
643
|
+
**kwargs,
|
|
644
|
+
):
|
|
589
645
|
"""
|
|
590
646
|
Write multiple aligned numpy arrays to a table.
|
|
591
647
|
|
|
@@ -691,22 +747,23 @@ def write_arrays(
|
|
|
691
747
|
|
|
692
748
|
if table:
|
|
693
749
|
logger.debug("table explicitly provided, assuming single-table write")
|
|
694
|
-
return write_arrays(
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
750
|
+
return write_arrays(
|
|
751
|
+
[(table, data)],
|
|
752
|
+
cluster,
|
|
753
|
+
table=None,
|
|
754
|
+
dtype=dtype,
|
|
755
|
+
index=index,
|
|
756
|
+
_async=_async,
|
|
757
|
+
fast=fast,
|
|
758
|
+
truncate=truncate,
|
|
759
|
+
deduplicate=deduplicate,
|
|
760
|
+
deduplication_mode=deduplication_mode,
|
|
761
|
+
infer_types=infer_types,
|
|
762
|
+
write_through=write_through,
|
|
763
|
+
writer=writer,
|
|
764
|
+
retries=retries,
|
|
765
|
+
**kwargs,
|
|
766
|
+
)
|
|
710
767
|
|
|
711
768
|
ret = []
|
|
712
769
|
|
|
@@ -718,7 +775,7 @@ def write_arrays(
|
|
|
718
775
|
|
|
719
776
|
push_data = quasardb.WriterData()
|
|
720
777
|
|
|
721
|
-
for
|
|
778
|
+
for table, data_ in data:
|
|
722
779
|
# Acquire reference to table if string is provided
|
|
723
780
|
if isinstance(table, str):
|
|
724
781
|
table = table_cache.lookup(table, cluster)
|
|
@@ -729,10 +786,9 @@ def write_arrays(
|
|
|
729
786
|
assert type(dtype) is list
|
|
730
787
|
assert len(dtype) is len(cinfos)
|
|
731
788
|
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
assert '$timestamp' not in data_
|
|
789
|
+
if index is None and isinstance(data_, dict) and "$timestamp" in data_:
|
|
790
|
+
index_ = data_.pop("$timestamp")
|
|
791
|
+
assert "$timestamp" not in data_
|
|
736
792
|
elif index is not None:
|
|
737
793
|
index_ = index
|
|
738
794
|
else:
|
|
@@ -751,7 +807,6 @@ def write_arrays(
|
|
|
751
807
|
data_ = ensure_ma(data_, dtype=dtype)
|
|
752
808
|
data_ = _coerce_data(data_, dtype)
|
|
753
809
|
|
|
754
|
-
|
|
755
810
|
# Just some additional friendly information about incorrect dtypes, we'd
|
|
756
811
|
# prefer to have this information thrown from Python instead of native
|
|
757
812
|
# code as it generally makes for somewhat better error context.
|
|
@@ -778,37 +833,36 @@ def write_arrays(
|
|
|
778
833
|
# The initial use case was that so we can add additional parameters for test mocks, e.g. `mock_failures` so that
|
|
779
834
|
# we can validate the retry functionality.
|
|
780
835
|
push_kwargs = kwargs
|
|
781
|
-
push_kwargs[
|
|
782
|
-
push_kwargs[
|
|
783
|
-
push_kwargs[
|
|
784
|
-
push_kwargs[
|
|
836
|
+
push_kwargs["deduplicate"] = deduplicate
|
|
837
|
+
push_kwargs["deduplication_mode"] = deduplication_mode
|
|
838
|
+
push_kwargs["write_through"] = write_through
|
|
839
|
+
push_kwargs["retries"] = retries
|
|
785
840
|
|
|
786
841
|
logger.debug("pushing %d rows", n_rows)
|
|
787
842
|
start = time.time()
|
|
788
843
|
|
|
789
844
|
if fast is True:
|
|
790
|
-
push_kwargs[
|
|
845
|
+
push_kwargs["push_mode"] = quasardb.WriterPushMode.Fast
|
|
791
846
|
elif truncate is True:
|
|
792
|
-
push_kwargs[
|
|
847
|
+
push_kwargs["push_mode"] = quasardb.WriterPushMode.Truncate
|
|
793
848
|
elif isinstance(truncate, tuple):
|
|
794
|
-
push_kwargs[
|
|
795
|
-
push_kwargs[
|
|
849
|
+
push_kwargs["push_mode"] = quasardb.WriterPushMode.Truncate
|
|
850
|
+
push_kwargs["range"] = truncate
|
|
796
851
|
elif _async is True:
|
|
797
|
-
push_kwargs[
|
|
852
|
+
push_kwargs["push_mode"] = quasardb.WriterPushMode.Async
|
|
798
853
|
else:
|
|
799
|
-
push_kwargs[
|
|
854
|
+
push_kwargs["push_mode"] = quasardb.WriterPushMode.Transactional
|
|
800
855
|
|
|
801
856
|
writer.push(push_data, **push_kwargs)
|
|
802
857
|
|
|
803
|
-
logger.debug("pushed %d rows in %s seconds",
|
|
804
|
-
n_rows, (time.time() - start))
|
|
858
|
+
logger.debug("pushed %d rows in %s seconds", n_rows, (time.time() - start))
|
|
805
859
|
|
|
806
860
|
return ret
|
|
807
861
|
|
|
808
862
|
|
|
809
863
|
def _xform_query_results(xs, index, dict):
|
|
810
864
|
if len(xs) == 0:
|
|
811
|
-
return (np.array([], np.dtype(
|
|
865
|
+
return (np.array([], np.dtype("datetime64[ns]")), np.array([]))
|
|
812
866
|
|
|
813
867
|
n = None
|
|
814
868
|
for x in xs:
|
|
@@ -822,8 +876,8 @@ def _xform_query_results(xs, index, dict):
|
|
|
822
876
|
if index is None:
|
|
823
877
|
# Generate a range, put it in the front of the result list,
|
|
824
878
|
# recurse and tell the function to use that index.
|
|
825
|
-
xs_ = [(
|
|
826
|
-
return _xform_query_results(xs_,
|
|
879
|
+
xs_ = [("$index", np.arange(n))] + xs
|
|
880
|
+
return _xform_query_results(xs_, "$index", dict)
|
|
827
881
|
|
|
828
882
|
if isinstance(index, str):
|
|
829
883
|
for i in range(len(xs)):
|
|
@@ -833,10 +887,18 @@ def _xform_query_results(xs, index, dict):
|
|
|
833
887
|
# recurse with that offset
|
|
834
888
|
return _xform_query_results(xs, i, dict)
|
|
835
889
|
|
|
836
|
-
raise KeyError(
|
|
890
|
+
raise KeyError(
|
|
891
|
+
"Unable to resolve index column: column not found in results: {}".format(
|
|
892
|
+
index
|
|
893
|
+
)
|
|
894
|
+
)
|
|
837
895
|
|
|
838
896
|
if not isinstance(index, int):
|
|
839
|
-
raise TypeError(
|
|
897
|
+
raise TypeError(
|
|
898
|
+
"Unable to resolve index column: unrecognized type {}: {}".format(
|
|
899
|
+
type(index), index
|
|
900
|
+
)
|
|
901
|
+
)
|
|
840
902
|
|
|
841
903
|
idx = xs[index][1]
|
|
842
904
|
del xs[index]
|
|
@@ -845,7 +907,9 @@ def _xform_query_results(xs, index, dict):
|
|
|
845
907
|
# masked items: we cannot not have an index for a certain row.
|
|
846
908
|
if ma.isMA(idx):
|
|
847
909
|
if ma.count_masked(idx) > 0:
|
|
848
|
-
raise ValueError(
|
|
910
|
+
raise ValueError(
|
|
911
|
+
"Invalid index: null values detected. An index is never allowed to have null values."
|
|
912
|
+
)
|
|
849
913
|
|
|
850
914
|
assert isinstance(idx.data, np.ndarray)
|
|
851
915
|
idx = idx.data
|
|
@@ -860,10 +924,7 @@ def _xform_query_results(xs, index, dict):
|
|
|
860
924
|
return (idx, xs_)
|
|
861
925
|
|
|
862
926
|
|
|
863
|
-
def query(cluster,
|
|
864
|
-
query,
|
|
865
|
-
index=None,
|
|
866
|
-
dict=False):
|
|
927
|
+
def query(cluster, query, index=None, dict=False):
|
|
867
928
|
"""
|
|
868
929
|
Execute a query and return the results as numpy arrays. The shape of the return value
|
|
869
930
|
is always:
|