quasardb 3.14.2.dev4__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quasardb might be problematic. Click here for more details.
- quasardb/CMakeFiles/generate.stamp +1 -0
- quasardb/CMakeFiles/generate.stamp.depend +2 -0
- quasardb/INSTALL.vcxproj +209 -0
- quasardb/INSTALL.vcxproj.filters +13 -0
- quasardb/__init__.py +123 -0
- quasardb/cmake_install.cmake +48 -0
- quasardb/date/ALL_BUILD.vcxproj +181 -0
- quasardb/date/ALL_BUILD.vcxproj.filters +8 -0
- quasardb/date/CMakeFiles/Export/df49adab93b9e0c10c64f72458b31971/dateTargets.cmake +106 -0
- quasardb/date/CMakeFiles/generate.stamp +1 -0
- quasardb/date/CMakeFiles/generate.stamp.depend +6 -0
- quasardb/date/INSTALL.vcxproj +209 -0
- quasardb/date/INSTALL.vcxproj.filters +13 -0
- quasardb/date/cmake_install.cmake +71 -0
- quasardb/date/date.sln +60 -0
- quasardb/date/dateConfigVersion.cmake +65 -0
- quasardb/date/dateTargets.cmake +63 -0
- quasardb/extensions/__init__.py +8 -0
- quasardb/extensions/writer.py +193 -0
- quasardb/firehose.py +101 -0
- quasardb/numpy/__init__.py +901 -0
- quasardb/pandas/__init__.py +447 -0
- quasardb/pool.py +294 -0
- quasardb/pybind11/ALL_BUILD.vcxproj +181 -0
- quasardb/pybind11/ALL_BUILD.vcxproj.filters +8 -0
- quasardb/pybind11/CMakeFiles/generate.stamp +1 -0
- quasardb/pybind11/CMakeFiles/generate.stamp.depend +20 -0
- quasardb/pybind11/INSTALL.vcxproj +209 -0
- quasardb/pybind11/INSTALL.vcxproj.filters +13 -0
- quasardb/pybind11/cmake_install.cmake +40 -0
- quasardb/pybind11/pybind11.sln +60 -0
- quasardb/qdb_api.dll +0 -0
- quasardb/quasardb.cp313-win_amd64.pyd +0 -0
- quasardb/range-v3/ALL_BUILD.vcxproj +181 -0
- quasardb/range-v3/ALL_BUILD.vcxproj.filters +8 -0
- quasardb/range-v3/CMakeFiles/Export/d94ef200eca10a819b5858b33e808f5b/range-v3-targets.cmake +128 -0
- quasardb/range-v3/CMakeFiles/generate.stamp +1 -0
- quasardb/range-v3/CMakeFiles/generate.stamp.depend +18 -0
- quasardb/range-v3/INSTALL.vcxproj +209 -0
- quasardb/range-v3/INSTALL.vcxproj.filters +13 -0
- quasardb/range-v3/Range-v3.sln +72 -0
- quasardb/range-v3/cmake_install.cmake +107 -0
- quasardb/range-v3/include/range/v3/version.hpp +24 -0
- quasardb/range-v3/range-v3-config-version.cmake +83 -0
- quasardb/range-v3/range-v3-config.cmake +80 -0
- quasardb/range-v3/range.v3.headers.vcxproj +804 -0
- quasardb/range-v3/range.v3.headers.vcxproj.filters +952 -0
- quasardb/stats.py +233 -0
- quasardb/table_cache.py +52 -0
- quasardb-3.14.2.dev4.dist-info/LICENSE.md +11 -0
- quasardb-3.14.2.dev4.dist-info/METADATA +40 -0
- quasardb-3.14.2.dev4.dist-info/RECORD +54 -0
- quasardb-3.14.2.dev4.dist-info/WHEEL +5 -0
- quasardb-3.14.2.dev4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import quasardb
|
|
3
|
+
import numpy as np
|
|
4
|
+
import numpy.ma as ma
|
|
5
|
+
|
|
6
|
+
__all__ = []
|
|
7
|
+
|
|
8
|
+
def _ensure_ctype(self, idx, ctype):
|
|
9
|
+
assert 'table' in self._legacy_state
|
|
10
|
+
infos = self._legacy_state['table'].list_columns()
|
|
11
|
+
cinfo = infos[idx]
|
|
12
|
+
|
|
13
|
+
ctype_data = copy.copy(ctype)
|
|
14
|
+
ctype_column = copy.copy(cinfo.type)
|
|
15
|
+
|
|
16
|
+
if ctype_data == quasardb.ColumnType.Symbol:
|
|
17
|
+
ctype_data = quasardb.ColumnType.String
|
|
18
|
+
|
|
19
|
+
if ctype_column == quasardb.ColumnType.Symbol:
|
|
20
|
+
ctype_column = quasardb.ColumnType.String
|
|
21
|
+
|
|
22
|
+
if not ctype_data == ctype_column:
|
|
23
|
+
raise quasardb.IncompatibleTypeError()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _legacy_next_row(self, table):
|
|
27
|
+
if 'pending' not in self._legacy_state:
|
|
28
|
+
self._legacy_state['pending'] = []
|
|
29
|
+
|
|
30
|
+
if 'table' not in self._legacy_state:
|
|
31
|
+
self._legacy_state['table'] = table
|
|
32
|
+
|
|
33
|
+
self._legacy_state['pending'].append({'by_index': {}})
|
|
34
|
+
|
|
35
|
+
# Return reference to the row inside the buffer
|
|
36
|
+
return self._legacy_state['pending'][-1]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _legacy_current_row(self):
|
|
40
|
+
return self._legacy_state['pending'][-1]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _legacy_start_row(self, table, x):
|
|
44
|
+
row = _legacy_next_row(self, table)
|
|
45
|
+
assert '$timestamp' not in row
|
|
46
|
+
row['$timestamp'] = x
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _legacy_set_double(self, idx, x):
|
|
50
|
+
_ensure_ctype(self, idx, quasardb.ColumnType.Double)
|
|
51
|
+
assert isinstance(x, float)
|
|
52
|
+
assert idx not in _legacy_current_row(self)['by_index']
|
|
53
|
+
_legacy_current_row(self)['by_index'][idx] = x
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _legacy_set_int64(self, idx, x):
|
|
57
|
+
_ensure_ctype(self, idx, quasardb.ColumnType.Int64)
|
|
58
|
+
assert isinstance(x, int)
|
|
59
|
+
assert idx not in _legacy_current_row(self)['by_index']
|
|
60
|
+
_legacy_current_row(self)['by_index'][idx] = x
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _legacy_set_timestamp(self, idx, x):
|
|
64
|
+
_ensure_ctype(self, idx, quasardb.ColumnType.Timestamp)
|
|
65
|
+
assert idx not in _legacy_current_row(self)['by_index']
|
|
66
|
+
_legacy_current_row(self)['by_index'][idx] = x
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _legacy_set_string(self, idx, x):
|
|
70
|
+
_ensure_ctype(self, idx, quasardb.ColumnType.String)
|
|
71
|
+
assert isinstance(x, str)
|
|
72
|
+
assert idx not in _legacy_current_row(self)['by_index']
|
|
73
|
+
|
|
74
|
+
_legacy_current_row(self)['by_index'][idx] = x
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _legacy_set_blob(self, idx, x):
|
|
78
|
+
_ensure_ctype(self, idx, quasardb.ColumnType.Blob)
|
|
79
|
+
assert isinstance(x, bytes)
|
|
80
|
+
assert idx not in _legacy_current_row(self)['by_index']
|
|
81
|
+
|
|
82
|
+
_legacy_current_row(self)['by_index'][idx] = x
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _legacy_push(self):
|
|
86
|
+
if 'pending' not in self._legacy_state:
|
|
87
|
+
# Extremely likely default case, no "old" rows
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
assert 'table' in self._legacy_state
|
|
91
|
+
table = self._legacy_state['table']
|
|
92
|
+
|
|
93
|
+
# Some useful constants
|
|
94
|
+
dtype_by_ctype = {quasardb.ColumnType.Double: np.dtype('float64'),
|
|
95
|
+
quasardb.ColumnType.Int64: np.dtype('int64'),
|
|
96
|
+
quasardb.ColumnType.Timestamp: np.dtype('datetime64[ns]'),
|
|
97
|
+
quasardb.ColumnType.String: np.dtype('unicode'),
|
|
98
|
+
quasardb.ColumnType.Symbol: np.dtype('unicode'),
|
|
99
|
+
quasardb.ColumnType.Blob: np.dtype('bytes')
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
ctype_by_idx = {}
|
|
103
|
+
cinfos = table.list_columns()
|
|
104
|
+
for i in range(len(cinfos)):
|
|
105
|
+
ctype_by_idx[i] = cinfos[i].type
|
|
106
|
+
|
|
107
|
+
all_idx = set(ctype_by_idx.keys())
|
|
108
|
+
|
|
109
|
+
# Prepare data structure
|
|
110
|
+
pivoted = {'$timestamp': [],
|
|
111
|
+
'by_index': {}}
|
|
112
|
+
for i in all_idx:
|
|
113
|
+
pivoted['by_index'][i] = []
|
|
114
|
+
|
|
115
|
+
# Do the actual pivot
|
|
116
|
+
for row in self._legacy_state['pending']:
|
|
117
|
+
assert '$timestamp' in row
|
|
118
|
+
assert 'by_index' in row
|
|
119
|
+
|
|
120
|
+
pivoted['$timestamp'].append(row['$timestamp'])
|
|
121
|
+
|
|
122
|
+
for idx in pivoted['by_index'].keys():
|
|
123
|
+
val = row['by_index'].get(idx, None)
|
|
124
|
+
pivoted['by_index'][idx].append(val)
|
|
125
|
+
|
|
126
|
+
# Validation / verification, not strictly necessary. Effectively
|
|
127
|
+
# ensures that we have the exact same amount of values for every
|
|
128
|
+
# column
|
|
129
|
+
for xs in pivoted['by_index'].values():
|
|
130
|
+
assert len(xs) == len(pivoted['$timestamp'])
|
|
131
|
+
|
|
132
|
+
column_data = []
|
|
133
|
+
|
|
134
|
+
for idx,xs in pivoted['by_index'].items():
|
|
135
|
+
ctype = ctype_by_idx[idx]
|
|
136
|
+
dtype = dtype_by_ctype[ctype]
|
|
137
|
+
|
|
138
|
+
# None-mask works, because everything inside the list are just regular ojbects
|
|
139
|
+
|
|
140
|
+
mask = [x is None for x in xs]
|
|
141
|
+
|
|
142
|
+
xs_ = []
|
|
143
|
+
if all(mask):
|
|
144
|
+
xs_ = ma.masked_all(len(xs),
|
|
145
|
+
dtype=dtype)
|
|
146
|
+
else:
|
|
147
|
+
xs_ = ma.masked_array(data=np.array(xs, dtype), mask=mask)
|
|
148
|
+
|
|
149
|
+
assert len(xs_) == len(pivoted['$timestamp'])
|
|
150
|
+
|
|
151
|
+
column_data.append(xs_)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
push_data = quasardb.WriterData()
|
|
155
|
+
index = np.array(pivoted['$timestamp'], np.dtype('datetime64[ns]'))
|
|
156
|
+
|
|
157
|
+
push_data.append(table, index, column_data)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
self._legacy_state = {}
|
|
161
|
+
return push_data
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _wrap_fn(old_fn, replace_fn):
|
|
165
|
+
|
|
166
|
+
def wrapped(self, *args, **kwargs):
|
|
167
|
+
data = replace_fn(self)
|
|
168
|
+
if data:
|
|
169
|
+
return old_fn(self, data, *args, **kwargs)
|
|
170
|
+
else:
|
|
171
|
+
return old_fn(self, *args, **kwargs)
|
|
172
|
+
|
|
173
|
+
return wrapped
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def extend_writer(x):
|
|
177
|
+
"""
|
|
178
|
+
Extends the writer with the "old", batch inserter API. This is purely
|
|
179
|
+
a backwards compatibility layer, and we want to avoid having to maintain that
|
|
180
|
+
in C++ with few benefits.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
x.start_row = _legacy_start_row
|
|
184
|
+
x.set_double = _legacy_set_double
|
|
185
|
+
x.set_int64 = _legacy_set_int64
|
|
186
|
+
x.set_string = _legacy_set_string
|
|
187
|
+
x.set_blob = _legacy_set_blob
|
|
188
|
+
x.set_timestamp = _legacy_set_timestamp
|
|
189
|
+
|
|
190
|
+
x.push = _wrap_fn(x.push, _legacy_push)
|
|
191
|
+
x.push_fast = _wrap_fn(x.push_fast, _legacy_push)
|
|
192
|
+
x.push_async = _wrap_fn(x.push_async, _legacy_push)
|
|
193
|
+
x.push_truncate = _wrap_fn(x.push_truncate, _legacy_push)
|
quasardb/firehose.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import quasardb
|
|
3
|
+
import logging
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
FIREHOSE_TABLE = "$qdb.firehose"
|
|
7
|
+
POLL_INTERVAL = 0.1
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger('quasardb.firehose')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _init():
|
|
13
|
+
"""
|
|
14
|
+
Initialize our internal state.
|
|
15
|
+
"""
|
|
16
|
+
return {'last': None,
|
|
17
|
+
'seen': set()}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _get_transactions_since(conn, table_name, last):
|
|
21
|
+
"""
|
|
22
|
+
Retrieve all transactions since a certain timestamp. `last` is expected to be a dict
|
|
23
|
+
firehose row with at least a $timestamp attached.
|
|
24
|
+
"""
|
|
25
|
+
if last is None:
|
|
26
|
+
q = "SELECT $timestamp, transaction_id, begin, end FROM \"{}\" WHERE table = '{}' ORDER BY $timestamp".format(
|
|
27
|
+
FIREHOSE_TABLE, table_name)
|
|
28
|
+
else:
|
|
29
|
+
q = "SELECT $timestamp, transaction_id, begin, end FROM \"{}\" IN RANGE ({}, +1y) WHERE table = '{}' ORDER BY $timestamp".format(
|
|
30
|
+
FIREHOSE_TABLE, last['$timestamp'], table_name)
|
|
31
|
+
|
|
32
|
+
return conn.query(q)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _get_transaction_data(conn, table_name, begin, end):
|
|
36
|
+
"""
|
|
37
|
+
Gets all data from a certain table.
|
|
38
|
+
"""
|
|
39
|
+
q = "SELECT * FROM \"{}\" IN RANGE ({}, {}) ".format(
|
|
40
|
+
table_name, begin, end)
|
|
41
|
+
return conn.query(q)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _get_next(conn, table_name, state):
|
|
45
|
+
|
|
46
|
+
# Our flow to retrieve new data is as follows:
|
|
47
|
+
# 1. Based on the state's last processed transaction, retrieve all transactions
|
|
48
|
+
# that are logged into the firehose since then.
|
|
49
|
+
# 2. For each of the transactions, verify we haven't seen it before
|
|
50
|
+
# 3. For each of the transactions, pull in all data
|
|
51
|
+
# 4. Concatenate all this data (in order of quasardb transaction)
|
|
52
|
+
|
|
53
|
+
txs = _get_transactions_since(conn, table_name, state['last'])
|
|
54
|
+
|
|
55
|
+
xs = list()
|
|
56
|
+
for tx in txs:
|
|
57
|
+
txid = tx['transaction_id']
|
|
58
|
+
|
|
59
|
+
if state['last'] is not None and tx['$timestamp'] > state['last']['$timestamp']:
|
|
60
|
+
# At this point we are guaranteed that the transaction we encounter is
|
|
61
|
+
# 'new', will not conflict with any other transaction ids. It is thus
|
|
62
|
+
# safe to reset the txid set.
|
|
63
|
+
state['seen'] = set()
|
|
64
|
+
|
|
65
|
+
if txid not in state['seen']:
|
|
66
|
+
xs = xs + _get_transaction_data(conn,
|
|
67
|
+
table_name,
|
|
68
|
+
tx['begin'],
|
|
69
|
+
# The firehose logs transaction `end` span as
|
|
70
|
+
# end inclusive, while our bulk reader and/or query
|
|
71
|
+
# language are end exclusive.
|
|
72
|
+
tx['end'] + np.timedelta64(1, 'ns'))
|
|
73
|
+
|
|
74
|
+
# Because it is possible that multiple firehose changes are stored with the
|
|
75
|
+
# exact same $timestamp, we also keep track of the actually seen
|
|
76
|
+
# transaction ids.
|
|
77
|
+
state['seen'].add(txid)
|
|
78
|
+
|
|
79
|
+
state['last'] = tx
|
|
80
|
+
|
|
81
|
+
return (state, xs)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def subscribe(conn, table_name):
|
|
85
|
+
state = _init()
|
|
86
|
+
|
|
87
|
+
while True:
|
|
88
|
+
# Note how this is effectively a never-ending fold loop
|
|
89
|
+
# that transforms state into a new state. This state effectively
|
|
90
|
+
# functions as a checkpoint.
|
|
91
|
+
#
|
|
92
|
+
# At a later point, we could choose to provide the user
|
|
93
|
+
# direct access to this 'state' object, so that they can
|
|
94
|
+
# implement e.g. mechanisms to replay from a certain checkpoint.
|
|
95
|
+
(state, xs) = _get_next(conn, table_name, state)
|
|
96
|
+
|
|
97
|
+
for x in xs:
|
|
98
|
+
yield x
|
|
99
|
+
|
|
100
|
+
# Our poll interval
|
|
101
|
+
time.sleep(POLL_INTERVAL)
|