quasardb 3.14.2.dev4__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of quasardb might be problematic. Click here for more details.

Files changed (54) hide show
  1. quasardb/CMakeFiles/generate.stamp +1 -0
  2. quasardb/CMakeFiles/generate.stamp.depend +2 -0
  3. quasardb/INSTALL.vcxproj +209 -0
  4. quasardb/INSTALL.vcxproj.filters +13 -0
  5. quasardb/__init__.py +123 -0
  6. quasardb/cmake_install.cmake +48 -0
  7. quasardb/date/ALL_BUILD.vcxproj +181 -0
  8. quasardb/date/ALL_BUILD.vcxproj.filters +8 -0
  9. quasardb/date/CMakeFiles/Export/df49adab93b9e0c10c64f72458b31971/dateTargets.cmake +106 -0
  10. quasardb/date/CMakeFiles/generate.stamp +1 -0
  11. quasardb/date/CMakeFiles/generate.stamp.depend +6 -0
  12. quasardb/date/INSTALL.vcxproj +209 -0
  13. quasardb/date/INSTALL.vcxproj.filters +13 -0
  14. quasardb/date/cmake_install.cmake +71 -0
  15. quasardb/date/date.sln +60 -0
  16. quasardb/date/dateConfigVersion.cmake +65 -0
  17. quasardb/date/dateTargets.cmake +63 -0
  18. quasardb/extensions/__init__.py +8 -0
  19. quasardb/extensions/writer.py +193 -0
  20. quasardb/firehose.py +101 -0
  21. quasardb/numpy/__init__.py +901 -0
  22. quasardb/pandas/__init__.py +447 -0
  23. quasardb/pool.py +294 -0
  24. quasardb/pybind11/ALL_BUILD.vcxproj +181 -0
  25. quasardb/pybind11/ALL_BUILD.vcxproj.filters +8 -0
  26. quasardb/pybind11/CMakeFiles/generate.stamp +1 -0
  27. quasardb/pybind11/CMakeFiles/generate.stamp.depend +20 -0
  28. quasardb/pybind11/INSTALL.vcxproj +209 -0
  29. quasardb/pybind11/INSTALL.vcxproj.filters +13 -0
  30. quasardb/pybind11/cmake_install.cmake +40 -0
  31. quasardb/pybind11/pybind11.sln +60 -0
  32. quasardb/qdb_api.dll +0 -0
  33. quasardb/quasardb.cp313-win_amd64.pyd +0 -0
  34. quasardb/range-v3/ALL_BUILD.vcxproj +181 -0
  35. quasardb/range-v3/ALL_BUILD.vcxproj.filters +8 -0
  36. quasardb/range-v3/CMakeFiles/Export/d94ef200eca10a819b5858b33e808f5b/range-v3-targets.cmake +128 -0
  37. quasardb/range-v3/CMakeFiles/generate.stamp +1 -0
  38. quasardb/range-v3/CMakeFiles/generate.stamp.depend +18 -0
  39. quasardb/range-v3/INSTALL.vcxproj +209 -0
  40. quasardb/range-v3/INSTALL.vcxproj.filters +13 -0
  41. quasardb/range-v3/Range-v3.sln +72 -0
  42. quasardb/range-v3/cmake_install.cmake +107 -0
  43. quasardb/range-v3/include/range/v3/version.hpp +24 -0
  44. quasardb/range-v3/range-v3-config-version.cmake +83 -0
  45. quasardb/range-v3/range-v3-config.cmake +80 -0
  46. quasardb/range-v3/range.v3.headers.vcxproj +804 -0
  47. quasardb/range-v3/range.v3.headers.vcxproj.filters +952 -0
  48. quasardb/stats.py +233 -0
  49. quasardb/table_cache.py +52 -0
  50. quasardb-3.14.2.dev4.dist-info/LICENSE.md +11 -0
  51. quasardb-3.14.2.dev4.dist-info/METADATA +40 -0
  52. quasardb-3.14.2.dev4.dist-info/RECORD +54 -0
  53. quasardb-3.14.2.dev4.dist-info/WHEEL +5 -0
  54. quasardb-3.14.2.dev4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,193 @@
1
+ import copy
2
+ import quasardb
3
+ import numpy as np
4
+ import numpy.ma as ma
5
+
6
+ __all__ = []
7
+
8
+ def _ensure_ctype(self, idx, ctype):
9
+ assert 'table' in self._legacy_state
10
+ infos = self._legacy_state['table'].list_columns()
11
+ cinfo = infos[idx]
12
+
13
+ ctype_data = copy.copy(ctype)
14
+ ctype_column = copy.copy(cinfo.type)
15
+
16
+ if ctype_data == quasardb.ColumnType.Symbol:
17
+ ctype_data = quasardb.ColumnType.String
18
+
19
+ if ctype_column == quasardb.ColumnType.Symbol:
20
+ ctype_column = quasardb.ColumnType.String
21
+
22
+ if not ctype_data == ctype_column:
23
+ raise quasardb.IncompatibleTypeError()
24
+
25
+
26
+ def _legacy_next_row(self, table):
27
+ if 'pending' not in self._legacy_state:
28
+ self._legacy_state['pending'] = []
29
+
30
+ if 'table' not in self._legacy_state:
31
+ self._legacy_state['table'] = table
32
+
33
+ self._legacy_state['pending'].append({'by_index': {}})
34
+
35
+ # Return reference to the row inside the buffer
36
+ return self._legacy_state['pending'][-1]
37
+
38
+
39
+ def _legacy_current_row(self):
40
+ return self._legacy_state['pending'][-1]
41
+
42
+
43
+ def _legacy_start_row(self, table, x):
44
+ row = _legacy_next_row(self, table)
45
+ assert '$timestamp' not in row
46
+ row['$timestamp'] = x
47
+
48
+
49
+ def _legacy_set_double(self, idx, x):
50
+ _ensure_ctype(self, idx, quasardb.ColumnType.Double)
51
+ assert isinstance(x, float)
52
+ assert idx not in _legacy_current_row(self)['by_index']
53
+ _legacy_current_row(self)['by_index'][idx] = x
54
+
55
+
56
+ def _legacy_set_int64(self, idx, x):
57
+ _ensure_ctype(self, idx, quasardb.ColumnType.Int64)
58
+ assert isinstance(x, int)
59
+ assert idx not in _legacy_current_row(self)['by_index']
60
+ _legacy_current_row(self)['by_index'][idx] = x
61
+
62
+
63
+ def _legacy_set_timestamp(self, idx, x):
64
+ _ensure_ctype(self, idx, quasardb.ColumnType.Timestamp)
65
+ assert idx not in _legacy_current_row(self)['by_index']
66
+ _legacy_current_row(self)['by_index'][idx] = x
67
+
68
+
69
+ def _legacy_set_string(self, idx, x):
70
+ _ensure_ctype(self, idx, quasardb.ColumnType.String)
71
+ assert isinstance(x, str)
72
+ assert idx not in _legacy_current_row(self)['by_index']
73
+
74
+ _legacy_current_row(self)['by_index'][idx] = x
75
+
76
+
77
+ def _legacy_set_blob(self, idx, x):
78
+ _ensure_ctype(self, idx, quasardb.ColumnType.Blob)
79
+ assert isinstance(x, bytes)
80
+ assert idx not in _legacy_current_row(self)['by_index']
81
+
82
+ _legacy_current_row(self)['by_index'][idx] = x
83
+
84
+
85
+ def _legacy_push(self):
86
+ if 'pending' not in self._legacy_state:
87
+ # Extremely likely default case, no "old" rows
88
+ return
89
+
90
+ assert 'table' in self._legacy_state
91
+ table = self._legacy_state['table']
92
+
93
+ # Some useful constants
94
+ dtype_by_ctype = {quasardb.ColumnType.Double: np.dtype('float64'),
95
+ quasardb.ColumnType.Int64: np.dtype('int64'),
96
+ quasardb.ColumnType.Timestamp: np.dtype('datetime64[ns]'),
97
+ quasardb.ColumnType.String: np.dtype('unicode'),
98
+ quasardb.ColumnType.Symbol: np.dtype('unicode'),
99
+ quasardb.ColumnType.Blob: np.dtype('bytes')
100
+ }
101
+
102
+ ctype_by_idx = {}
103
+ cinfos = table.list_columns()
104
+ for i in range(len(cinfos)):
105
+ ctype_by_idx[i] = cinfos[i].type
106
+
107
+ all_idx = set(ctype_by_idx.keys())
108
+
109
+ # Prepare data structure
110
+ pivoted = {'$timestamp': [],
111
+ 'by_index': {}}
112
+ for i in all_idx:
113
+ pivoted['by_index'][i] = []
114
+
115
+ # Do the actual pivot
116
+ for row in self._legacy_state['pending']:
117
+ assert '$timestamp' in row
118
+ assert 'by_index' in row
119
+
120
+ pivoted['$timestamp'].append(row['$timestamp'])
121
+
122
+ for idx in pivoted['by_index'].keys():
123
+ val = row['by_index'].get(idx, None)
124
+ pivoted['by_index'][idx].append(val)
125
+
126
+ # Validation / verification, not strictly necessary. Effectively
127
+ # ensures that we have the exact same amount of values for every
128
+ # column
129
+ for xs in pivoted['by_index'].values():
130
+ assert len(xs) == len(pivoted['$timestamp'])
131
+
132
+ column_data = []
133
+
134
+ for idx,xs in pivoted['by_index'].items():
135
+ ctype = ctype_by_idx[idx]
136
+ dtype = dtype_by_ctype[ctype]
137
+
138
+ # None-mask works, because everything inside the list are just regular ojbects
139
+
140
+ mask = [x is None for x in xs]
141
+
142
+ xs_ = []
143
+ if all(mask):
144
+ xs_ = ma.masked_all(len(xs),
145
+ dtype=dtype)
146
+ else:
147
+ xs_ = ma.masked_array(data=np.array(xs, dtype), mask=mask)
148
+
149
+ assert len(xs_) == len(pivoted['$timestamp'])
150
+
151
+ column_data.append(xs_)
152
+
153
+
154
+ push_data = quasardb.WriterData()
155
+ index = np.array(pivoted['$timestamp'], np.dtype('datetime64[ns]'))
156
+
157
+ push_data.append(table, index, column_data)
158
+
159
+
160
+ self._legacy_state = {}
161
+ return push_data
162
+
163
+
164
+ def _wrap_fn(old_fn, replace_fn):
165
+
166
+ def wrapped(self, *args, **kwargs):
167
+ data = replace_fn(self)
168
+ if data:
169
+ return old_fn(self, data, *args, **kwargs)
170
+ else:
171
+ return old_fn(self, *args, **kwargs)
172
+
173
+ return wrapped
174
+
175
+
176
+ def extend_writer(x):
177
+ """
178
+ Extends the writer with the "old", batch inserter API. This is purely
179
+ a backwards compatibility layer, and we want to avoid having to maintain that
180
+ in C++ with few benefits.
181
+ """
182
+
183
+ x.start_row = _legacy_start_row
184
+ x.set_double = _legacy_set_double
185
+ x.set_int64 = _legacy_set_int64
186
+ x.set_string = _legacy_set_string
187
+ x.set_blob = _legacy_set_blob
188
+ x.set_timestamp = _legacy_set_timestamp
189
+
190
+ x.push = _wrap_fn(x.push, _legacy_push)
191
+ x.push_fast = _wrap_fn(x.push_fast, _legacy_push)
192
+ x.push_async = _wrap_fn(x.push_async, _legacy_push)
193
+ x.push_truncate = _wrap_fn(x.push_truncate, _legacy_push)
quasardb/firehose.py ADDED
@@ -0,0 +1,101 @@
1
+ import time
2
+ import quasardb
3
+ import logging
4
+ import numpy as np
5
+
6
+ FIREHOSE_TABLE = "$qdb.firehose"
7
+ POLL_INTERVAL = 0.1
8
+
9
+ logger = logging.getLogger('quasardb.firehose')
10
+
11
+
12
+ def _init():
13
+ """
14
+ Initialize our internal state.
15
+ """
16
+ return {'last': None,
17
+ 'seen': set()}
18
+
19
+
20
+ def _get_transactions_since(conn, table_name, last):
21
+ """
22
+ Retrieve all transactions since a certain timestamp. `last` is expected to be a dict
23
+ firehose row with at least a $timestamp attached.
24
+ """
25
+ if last is None:
26
+ q = "SELECT $timestamp, transaction_id, begin, end FROM \"{}\" WHERE table = '{}' ORDER BY $timestamp".format(
27
+ FIREHOSE_TABLE, table_name)
28
+ else:
29
+ q = "SELECT $timestamp, transaction_id, begin, end FROM \"{}\" IN RANGE ({}, +1y) WHERE table = '{}' ORDER BY $timestamp".format(
30
+ FIREHOSE_TABLE, last['$timestamp'], table_name)
31
+
32
+ return conn.query(q)
33
+
34
+
35
+ def _get_transaction_data(conn, table_name, begin, end):
36
+ """
37
+ Gets all data from a certain table.
38
+ """
39
+ q = "SELECT * FROM \"{}\" IN RANGE ({}, {}) ".format(
40
+ table_name, begin, end)
41
+ return conn.query(q)
42
+
43
+
44
+ def _get_next(conn, table_name, state):
45
+
46
+ # Our flow to retrieve new data is as follows:
47
+ # 1. Based on the state's last processed transaction, retrieve all transactions
48
+ # that are logged into the firehose since then.
49
+ # 2. For each of the transactions, verify we haven't seen it before
50
+ # 3. For each of the transactions, pull in all data
51
+ # 4. Concatenate all this data (in order of quasardb transaction)
52
+
53
+ txs = _get_transactions_since(conn, table_name, state['last'])
54
+
55
+ xs = list()
56
+ for tx in txs:
57
+ txid = tx['transaction_id']
58
+
59
+ if state['last'] is not None and tx['$timestamp'] > state['last']['$timestamp']:
60
+ # At this point we are guaranteed that the transaction we encounter is
61
+ # 'new', will not conflict with any other transaction ids. It is thus
62
+ # safe to reset the txid set.
63
+ state['seen'] = set()
64
+
65
+ if txid not in state['seen']:
66
+ xs = xs + _get_transaction_data(conn,
67
+ table_name,
68
+ tx['begin'],
69
+ # The firehose logs transaction `end` span as
70
+ # end inclusive, while our bulk reader and/or query
71
+ # language are end exclusive.
72
+ tx['end'] + np.timedelta64(1, 'ns'))
73
+
74
+ # Because it is possible that multiple firehose changes are stored with the
75
+ # exact same $timestamp, we also keep track of the actually seen
76
+ # transaction ids.
77
+ state['seen'].add(txid)
78
+
79
+ state['last'] = tx
80
+
81
+ return (state, xs)
82
+
83
+
84
+ def subscribe(conn, table_name):
85
+ state = _init()
86
+
87
+ while True:
88
+ # Note how this is effectively a never-ending fold loop
89
+ # that transforms state into a new state. This state effectively
90
+ # functions as a checkpoint.
91
+ #
92
+ # At a later point, we could choose to provide the user
93
+ # direct access to this 'state' object, so that they can
94
+ # implement e.g. mechanisms to replay from a certain checkpoint.
95
+ (state, xs) = _get_next(conn, table_name, state)
96
+
97
+ for x in xs:
98
+ yield x
99
+
100
+ # Our poll interval
101
+ time.sleep(POLL_INTERVAL)