quasardb 3.14.2.dev4__cp311-cp311-macosx_11_0_arm64.whl → 3.14.2.dev6__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quasardb might be problematic. Click here for more details.
- quasardb/CMakeFiles/CMakeDirectoryInformation.cmake +2 -2
- quasardb/Makefile +20 -20
- quasardb/__init__.py +21 -7
- quasardb/cmake_install.cmake +5 -5
- quasardb/date/CMakeFiles/CMakeDirectoryInformation.cmake +2 -2
- quasardb/date/CMakeFiles/Export/a52b05f964b070ee926bcad51d3288af/dateTargets.cmake +1 -1
- quasardb/date/Makefile +20 -20
- quasardb/date/cmake_install.cmake +5 -5
- quasardb/date/dateTargets.cmake +1 -1
- quasardb/extensions/writer.py +59 -61
- quasardb/firehose.py +24 -22
- quasardb/libqdb_api.dylib +0 -0
- quasardb/numpy/__init__.py +262 -128
- quasardb/pandas/__init__.py +145 -91
- quasardb/pool.py +13 -2
- quasardb/pybind11/CMakeFiles/CMakeDirectoryInformation.cmake +2 -2
- quasardb/pybind11/Makefile +20 -20
- quasardb/pybind11/cmake_install.cmake +2 -2
- quasardb/quasardb.cpython-311-darwin.so +0 -0
- quasardb/range-v3/CMakeFiles/CMakeDirectoryInformation.cmake +2 -2
- quasardb/range-v3/CMakeFiles/Export/d94ef200eca10a819b5858b33e808f5b/range-v3-targets.cmake +1 -1
- quasardb/range-v3/CMakeFiles/range.v3.headers.dir/build.make +17 -17
- quasardb/range-v3/Makefile +25 -25
- quasardb/range-v3/cmake_install.cmake +8 -8
- quasardb/range-v3/range-v3-config.cmake +1 -1
- quasardb/stats.py +245 -120
- quasardb/table_cache.py +5 -1
- {quasardb-3.14.2.dev4.dist-info → quasardb-3.14.2.dev6.dist-info}/METADATA +3 -2
- quasardb-3.14.2.dev6.dist-info/RECORD +45 -0
- {quasardb-3.14.2.dev4.dist-info → quasardb-3.14.2.dev6.dist-info}/WHEEL +1 -1
- quasardb-3.14.2.dev4.dist-info/RECORD +0 -45
- {quasardb-3.14.2.dev4.dist-info → quasardb-3.14.2.dev6.dist-info/licenses}/LICENSE.md +0 -0
- {quasardb-3.14.2.dev4.dist-info → quasardb-3.14.2.dev6.dist-info}/top_level.txt +0 -0
quasardb/extensions/writer.py
CHANGED
|
@@ -5,9 +5,10 @@ import numpy.ma as ma
|
|
|
5
5
|
|
|
6
6
|
__all__ = []
|
|
7
7
|
|
|
8
|
+
|
|
8
9
|
def _ensure_ctype(self, idx, ctype):
|
|
9
|
-
assert
|
|
10
|
-
infos = self._legacy_state[
|
|
10
|
+
assert "table" in self._legacy_state
|
|
11
|
+
infos = self._legacy_state["table"].list_columns()
|
|
11
12
|
cinfo = infos[idx]
|
|
12
13
|
|
|
13
14
|
ctype_data = copy.copy(ctype)
|
|
@@ -24,80 +25,81 @@ def _ensure_ctype(self, idx, ctype):
|
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
def _legacy_next_row(self, table):
|
|
27
|
-
if
|
|
28
|
-
self._legacy_state[
|
|
28
|
+
if "pending" not in self._legacy_state:
|
|
29
|
+
self._legacy_state["pending"] = []
|
|
29
30
|
|
|
30
|
-
if
|
|
31
|
-
self._legacy_state[
|
|
31
|
+
if "table" not in self._legacy_state:
|
|
32
|
+
self._legacy_state["table"] = table
|
|
32
33
|
|
|
33
|
-
self._legacy_state[
|
|
34
|
+
self._legacy_state["pending"].append({"by_index": {}})
|
|
34
35
|
|
|
35
36
|
# Return reference to the row inside the buffer
|
|
36
|
-
return self._legacy_state[
|
|
37
|
+
return self._legacy_state["pending"][-1]
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
def _legacy_current_row(self):
|
|
40
|
-
return self._legacy_state[
|
|
41
|
+
return self._legacy_state["pending"][-1]
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def _legacy_start_row(self, table, x):
|
|
44
45
|
row = _legacy_next_row(self, table)
|
|
45
|
-
assert
|
|
46
|
-
row[
|
|
46
|
+
assert "$timestamp" not in row
|
|
47
|
+
row["$timestamp"] = x
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
def _legacy_set_double(self, idx, x):
|
|
50
51
|
_ensure_ctype(self, idx, quasardb.ColumnType.Double)
|
|
51
52
|
assert isinstance(x, float)
|
|
52
|
-
assert idx not in _legacy_current_row(self)[
|
|
53
|
-
_legacy_current_row(self)[
|
|
53
|
+
assert idx not in _legacy_current_row(self)["by_index"]
|
|
54
|
+
_legacy_current_row(self)["by_index"][idx] = x
|
|
54
55
|
|
|
55
56
|
|
|
56
57
|
def _legacy_set_int64(self, idx, x):
|
|
57
58
|
_ensure_ctype(self, idx, quasardb.ColumnType.Int64)
|
|
58
59
|
assert isinstance(x, int)
|
|
59
|
-
assert idx not in _legacy_current_row(self)[
|
|
60
|
-
_legacy_current_row(self)[
|
|
60
|
+
assert idx not in _legacy_current_row(self)["by_index"]
|
|
61
|
+
_legacy_current_row(self)["by_index"][idx] = x
|
|
61
62
|
|
|
62
63
|
|
|
63
64
|
def _legacy_set_timestamp(self, idx, x):
|
|
64
65
|
_ensure_ctype(self, idx, quasardb.ColumnType.Timestamp)
|
|
65
|
-
assert idx not in _legacy_current_row(self)[
|
|
66
|
-
_legacy_current_row(self)[
|
|
66
|
+
assert idx not in _legacy_current_row(self)["by_index"]
|
|
67
|
+
_legacy_current_row(self)["by_index"][idx] = x
|
|
67
68
|
|
|
68
69
|
|
|
69
70
|
def _legacy_set_string(self, idx, x):
|
|
70
71
|
_ensure_ctype(self, idx, quasardb.ColumnType.String)
|
|
71
72
|
assert isinstance(x, str)
|
|
72
|
-
assert idx not in _legacy_current_row(self)[
|
|
73
|
+
assert idx not in _legacy_current_row(self)["by_index"]
|
|
73
74
|
|
|
74
|
-
_legacy_current_row(self)[
|
|
75
|
+
_legacy_current_row(self)["by_index"][idx] = x
|
|
75
76
|
|
|
76
77
|
|
|
77
78
|
def _legacy_set_blob(self, idx, x):
|
|
78
79
|
_ensure_ctype(self, idx, quasardb.ColumnType.Blob)
|
|
79
80
|
assert isinstance(x, bytes)
|
|
80
|
-
assert idx not in _legacy_current_row(self)[
|
|
81
|
+
assert idx not in _legacy_current_row(self)["by_index"]
|
|
81
82
|
|
|
82
|
-
_legacy_current_row(self)[
|
|
83
|
+
_legacy_current_row(self)["by_index"][idx] = x
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
def _legacy_push(self):
|
|
86
|
-
if
|
|
87
|
+
if "pending" not in self._legacy_state:
|
|
87
88
|
# Extremely likely default case, no "old" rows
|
|
88
89
|
return
|
|
89
90
|
|
|
90
|
-
assert
|
|
91
|
-
table = self._legacy_state[
|
|
91
|
+
assert "table" in self._legacy_state
|
|
92
|
+
table = self._legacy_state["table"]
|
|
92
93
|
|
|
93
94
|
# Some useful constants
|
|
94
|
-
dtype_by_ctype = {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
dtype_by_ctype = {
|
|
96
|
+
quasardb.ColumnType.Double: np.dtype("float64"),
|
|
97
|
+
quasardb.ColumnType.Int64: np.dtype("int64"),
|
|
98
|
+
quasardb.ColumnType.Timestamp: np.dtype("datetime64[ns]"),
|
|
99
|
+
quasardb.ColumnType.String: np.dtype("unicode"),
|
|
100
|
+
quasardb.ColumnType.Symbol: np.dtype("unicode"),
|
|
101
|
+
quasardb.ColumnType.Blob: np.dtype("bytes"),
|
|
102
|
+
}
|
|
101
103
|
|
|
102
104
|
ctype_by_idx = {}
|
|
103
105
|
cinfos = table.list_columns()
|
|
@@ -107,31 +109,30 @@ def _legacy_push(self):
|
|
|
107
109
|
all_idx = set(ctype_by_idx.keys())
|
|
108
110
|
|
|
109
111
|
# Prepare data structure
|
|
110
|
-
pivoted = {
|
|
111
|
-
'by_index': {}}
|
|
112
|
+
pivoted = {"$timestamp": [], "by_index": {}}
|
|
112
113
|
for i in all_idx:
|
|
113
|
-
pivoted[
|
|
114
|
+
pivoted["by_index"][i] = []
|
|
114
115
|
|
|
115
116
|
# Do the actual pivot
|
|
116
|
-
for row in self._legacy_state[
|
|
117
|
-
assert
|
|
118
|
-
assert
|
|
117
|
+
for row in self._legacy_state["pending"]:
|
|
118
|
+
assert "$timestamp" in row
|
|
119
|
+
assert "by_index" in row
|
|
119
120
|
|
|
120
|
-
pivoted[
|
|
121
|
+
pivoted["$timestamp"].append(row["$timestamp"])
|
|
121
122
|
|
|
122
|
-
for idx in pivoted[
|
|
123
|
-
val = row[
|
|
124
|
-
pivoted[
|
|
123
|
+
for idx in pivoted["by_index"].keys():
|
|
124
|
+
val = row["by_index"].get(idx, None)
|
|
125
|
+
pivoted["by_index"][idx].append(val)
|
|
125
126
|
|
|
126
127
|
# Validation / verification, not strictly necessary. Effectively
|
|
127
128
|
# ensures that we have the exact same amount of values for every
|
|
128
129
|
# column
|
|
129
|
-
for xs in pivoted[
|
|
130
|
-
assert len(xs) == len(pivoted[
|
|
130
|
+
for xs in pivoted["by_index"].values():
|
|
131
|
+
assert len(xs) == len(pivoted["$timestamp"])
|
|
131
132
|
|
|
132
133
|
column_data = []
|
|
133
134
|
|
|
134
|
-
for idx,xs in pivoted[
|
|
135
|
+
for idx, xs in pivoted["by_index"].items():
|
|
135
136
|
ctype = ctype_by_idx[idx]
|
|
136
137
|
dtype = dtype_by_ctype[ctype]
|
|
137
138
|
|
|
@@ -141,22 +142,19 @@ def _legacy_push(self):
|
|
|
141
142
|
|
|
142
143
|
xs_ = []
|
|
143
144
|
if all(mask):
|
|
144
|
-
xs_ = ma.masked_all(len(xs),
|
|
145
|
-
dtype=dtype)
|
|
145
|
+
xs_ = ma.masked_all(len(xs), dtype=dtype)
|
|
146
146
|
else:
|
|
147
147
|
xs_ = ma.masked_array(data=np.array(xs, dtype), mask=mask)
|
|
148
148
|
|
|
149
|
-
assert len(xs_) == len(pivoted[
|
|
149
|
+
assert len(xs_) == len(pivoted["$timestamp"])
|
|
150
150
|
|
|
151
151
|
column_data.append(xs_)
|
|
152
152
|
|
|
153
|
-
|
|
154
153
|
push_data = quasardb.WriterData()
|
|
155
|
-
index = np.array(pivoted[
|
|
154
|
+
index = np.array(pivoted["$timestamp"], np.dtype("datetime64[ns]"))
|
|
156
155
|
|
|
157
156
|
push_data.append(table, index, column_data)
|
|
158
157
|
|
|
159
|
-
|
|
160
158
|
self._legacy_state = {}
|
|
161
159
|
return push_data
|
|
162
160
|
|
|
@@ -180,14 +178,14 @@ def extend_writer(x):
|
|
|
180
178
|
in C++ with few benefits.
|
|
181
179
|
"""
|
|
182
180
|
|
|
183
|
-
x.start_row
|
|
184
|
-
x.set_double
|
|
185
|
-
x.set_int64
|
|
186
|
-
x.set_string
|
|
187
|
-
x.set_blob
|
|
188
|
-
x.set_timestamp
|
|
189
|
-
|
|
190
|
-
x.push
|
|
191
|
-
x.push_fast
|
|
192
|
-
x.push_async
|
|
193
|
-
x.push_truncate
|
|
181
|
+
x.start_row = _legacy_start_row
|
|
182
|
+
x.set_double = _legacy_set_double
|
|
183
|
+
x.set_int64 = _legacy_set_int64
|
|
184
|
+
x.set_string = _legacy_set_string
|
|
185
|
+
x.set_blob = _legacy_set_blob
|
|
186
|
+
x.set_timestamp = _legacy_set_timestamp
|
|
187
|
+
|
|
188
|
+
x.push = _wrap_fn(x.push, _legacy_push)
|
|
189
|
+
x.push_fast = _wrap_fn(x.push_fast, _legacy_push)
|
|
190
|
+
x.push_async = _wrap_fn(x.push_async, _legacy_push)
|
|
191
|
+
x.push_truncate = _wrap_fn(x.push_truncate, _legacy_push)
|
quasardb/firehose.py
CHANGED
|
@@ -6,15 +6,14 @@ import numpy as np
|
|
|
6
6
|
FIREHOSE_TABLE = "$qdb.firehose"
|
|
7
7
|
POLL_INTERVAL = 0.1
|
|
8
8
|
|
|
9
|
-
logger = logging.getLogger(
|
|
9
|
+
logger = logging.getLogger("quasardb.firehose")
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def _init():
|
|
13
13
|
"""
|
|
14
14
|
Initialize our internal state.
|
|
15
15
|
"""
|
|
16
|
-
return {
|
|
17
|
-
'seen': set()}
|
|
16
|
+
return {"last": None, "seen": set()}
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
def _get_transactions_since(conn, table_name, last):
|
|
@@ -24,10 +23,12 @@ def _get_transactions_since(conn, table_name, last):
|
|
|
24
23
|
"""
|
|
25
24
|
if last is None:
|
|
26
25
|
q = "SELECT $timestamp, transaction_id, begin, end FROM \"{}\" WHERE table = '{}' ORDER BY $timestamp".format(
|
|
27
|
-
FIREHOSE_TABLE, table_name
|
|
26
|
+
FIREHOSE_TABLE, table_name
|
|
27
|
+
)
|
|
28
28
|
else:
|
|
29
29
|
q = "SELECT $timestamp, transaction_id, begin, end FROM \"{}\" IN RANGE ({}, +1y) WHERE table = '{}' ORDER BY $timestamp".format(
|
|
30
|
-
FIREHOSE_TABLE, last[
|
|
30
|
+
FIREHOSE_TABLE, last["$timestamp"], table_name
|
|
31
|
+
)
|
|
31
32
|
|
|
32
33
|
return conn.query(q)
|
|
33
34
|
|
|
@@ -36,8 +37,7 @@ def _get_transaction_data(conn, table_name, begin, end):
|
|
|
36
37
|
"""
|
|
37
38
|
Gets all data from a certain table.
|
|
38
39
|
"""
|
|
39
|
-
q =
|
|
40
|
-
table_name, begin, end)
|
|
40
|
+
q = 'SELECT * FROM "{}" IN RANGE ({}, {}) '.format(table_name, begin, end)
|
|
41
41
|
return conn.query(q)
|
|
42
42
|
|
|
43
43
|
|
|
@@ -50,33 +50,35 @@ def _get_next(conn, table_name, state):
|
|
|
50
50
|
# 3. For each of the transactions, pull in all data
|
|
51
51
|
# 4. Concatenate all this data (in order of quasardb transaction)
|
|
52
52
|
|
|
53
|
-
txs = _get_transactions_since(conn, table_name, state[
|
|
53
|
+
txs = _get_transactions_since(conn, table_name, state["last"])
|
|
54
54
|
|
|
55
55
|
xs = list()
|
|
56
56
|
for tx in txs:
|
|
57
|
-
txid = tx[
|
|
57
|
+
txid = tx["transaction_id"]
|
|
58
58
|
|
|
59
|
-
if state[
|
|
59
|
+
if state["last"] is not None and tx["$timestamp"] > state["last"]["$timestamp"]:
|
|
60
60
|
# At this point we are guaranteed that the transaction we encounter is
|
|
61
61
|
# 'new', will not conflict with any other transaction ids. It is thus
|
|
62
62
|
# safe to reset the txid set.
|
|
63
|
-
state[
|
|
64
|
-
|
|
65
|
-
if txid not in state[
|
|
66
|
-
xs = xs + _get_transaction_data(
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
63
|
+
state["seen"] = set()
|
|
64
|
+
|
|
65
|
+
if txid not in state["seen"]:
|
|
66
|
+
xs = xs + _get_transaction_data(
|
|
67
|
+
conn,
|
|
68
|
+
table_name,
|
|
69
|
+
tx["begin"],
|
|
70
|
+
# The firehose logs transaction `end` span as
|
|
71
|
+
# end inclusive, while our bulk reader and/or query
|
|
72
|
+
# language are end exclusive.
|
|
73
|
+
tx["end"] + np.timedelta64(1, "ns"),
|
|
74
|
+
)
|
|
73
75
|
|
|
74
76
|
# Because it is possible that multiple firehose changes are stored with the
|
|
75
77
|
# exact same $timestamp, we also keep track of the actually seen
|
|
76
78
|
# transaction ids.
|
|
77
|
-
state[
|
|
79
|
+
state["seen"].add(txid)
|
|
78
80
|
|
|
79
|
-
state[
|
|
81
|
+
state["last"] = tx
|
|
80
82
|
|
|
81
83
|
return (state, xs)
|
|
82
84
|
|
quasardb/libqdb_api.dylib
CHANGED
|
Binary file
|