quasardb 3.14.2.dev8__cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quasardb/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
- quasardb/CMakeFiles/progress.marks +1 -0
- quasardb/Makefile +189 -0
- quasardb/__init__.py +140 -0
- quasardb/__init__.pyi +72 -0
- quasardb/cmake_install.cmake +58 -0
- quasardb/date/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
- quasardb/date/CMakeFiles/Export/b76006b2b7125baf1b0b4d4ca4db82bd/dateTargets.cmake +108 -0
- quasardb/date/CMakeFiles/progress.marks +1 -0
- quasardb/date/Makefile +189 -0
- quasardb/date/cmake_install.cmake +81 -0
- quasardb/date/dateConfigVersion.cmake +65 -0
- quasardb/date/dateTargets.cmake +63 -0
- quasardb/extensions/__init__.py +9 -0
- quasardb/extensions/writer.py +195 -0
- quasardb/firehose.py +112 -0
- quasardb/libqdb_api.so +0 -0
- quasardb/numpy/__init__.py +1106 -0
- quasardb/pandas/__init__.py +696 -0
- quasardb/pool.py +338 -0
- quasardb/pybind11/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
- quasardb/pybind11/CMakeFiles/progress.marks +1 -0
- quasardb/pybind11/Makefile +189 -0
- quasardb/pybind11/cmake_install.cmake +50 -0
- quasardb/quasardb/__init__.pyi +97 -0
- quasardb/quasardb/_batch_column.pyi +5 -0
- quasardb/quasardb/_batch_inserter.pyi +32 -0
- quasardb/quasardb/_blob.pyi +16 -0
- quasardb/quasardb/_cluster.pyi +106 -0
- quasardb/quasardb/_continuous.pyi +18 -0
- quasardb/quasardb/_double.pyi +7 -0
- quasardb/quasardb/_entry.pyi +61 -0
- quasardb/quasardb/_error.pyi +15 -0
- quasardb/quasardb/_integer.pyi +7 -0
- quasardb/quasardb/_node.pyi +26 -0
- quasardb/quasardb/_options.pyi +106 -0
- quasardb/quasardb/_perf.pyi +7 -0
- quasardb/quasardb/_properties.pyi +5 -0
- quasardb/quasardb/_query.pyi +2 -0
- quasardb/quasardb/_reader.pyi +15 -0
- quasardb/quasardb/_retry.pyi +16 -0
- quasardb/quasardb/_string.pyi +12 -0
- quasardb/quasardb/_table.pyi +140 -0
- quasardb/quasardb/_tag.pyi +5 -0
- quasardb/quasardb/_timestamp.pyi +9 -0
- quasardb/quasardb/_writer.pyi +112 -0
- quasardb/quasardb/metrics/__init__.pyi +28 -0
- quasardb/quasardb.cpython-310-x86_64-linux-gnu.so +0 -0
- quasardb/range-v3/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
- quasardb/range-v3/CMakeFiles/Export/48a02d54b5e9e60c30c5f249b431a911/range-v3-targets.cmake +128 -0
- quasardb/range-v3/CMakeFiles/progress.marks +1 -0
- quasardb/range-v3/CMakeFiles/range.v3.headers.dir/DependInfo.cmake +22 -0
- quasardb/range-v3/CMakeFiles/range.v3.headers.dir/build.make +86 -0
- quasardb/range-v3/CMakeFiles/range.v3.headers.dir/cmake_clean.cmake +5 -0
- quasardb/range-v3/CMakeFiles/range.v3.headers.dir/compiler_depend.make +2 -0
- quasardb/range-v3/CMakeFiles/range.v3.headers.dir/compiler_depend.ts +2 -0
- quasardb/range-v3/CMakeFiles/range.v3.headers.dir/progress.make +1 -0
- quasardb/range-v3/Makefile +204 -0
- quasardb/range-v3/cmake_install.cmake +93 -0
- quasardb/range-v3/include/range/v3/version.hpp +24 -0
- quasardb/range-v3/range-v3-config-version.cmake +83 -0
- quasardb/range-v3/range-v3-config.cmake +80 -0
- quasardb/stats.py +376 -0
- quasardb/table_cache.py +60 -0
- quasardb/typing.py +23 -0
- quasardb-3.14.2.dev8.dist-info/METADATA +41 -0
- quasardb-3.14.2.dev8.dist-info/RECORD +70 -0
- quasardb-3.14.2.dev8.dist-info/WHEEL +6 -0
- quasardb-3.14.2.dev8.dist-info/licenses/LICENSE.md +11 -0
- quasardb-3.14.2.dev8.dist-info/top_level.txt +1 -0
quasardb/firehose.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from quasardb import Cluster
|
|
8
|
+
|
|
9
|
+
FIREHOSE_TABLE = "$qdb.firehose"
|
|
10
|
+
POLL_INTERVAL = 0.1
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger("quasardb.firehose")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _init() -> Dict[str, Any]:
|
|
16
|
+
"""
|
|
17
|
+
Initialize our internal state.
|
|
18
|
+
"""
|
|
19
|
+
return {"last": None, "seen": set()}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_transactions_since(
|
|
23
|
+
conn: Cluster, table_name: str, last: Optional[Dict[str, Any]]
|
|
24
|
+
) -> List[Dict[str, Any]]:
|
|
25
|
+
"""
|
|
26
|
+
Retrieve all transactions since a certain timestamp. `last` is expected to be a dict
|
|
27
|
+
firehose row with at least a $timestamp attached.
|
|
28
|
+
"""
|
|
29
|
+
if last is None:
|
|
30
|
+
q = "SELECT $timestamp, transaction_id, begin, end FROM \"{}\" WHERE table = '{}' ORDER BY $timestamp".format(
|
|
31
|
+
FIREHOSE_TABLE, table_name
|
|
32
|
+
)
|
|
33
|
+
else:
|
|
34
|
+
q = "SELECT $timestamp, transaction_id, begin, end FROM \"{}\" IN RANGE ({}, +1y) WHERE table = '{}' ORDER BY $timestamp".format(
|
|
35
|
+
FIREHOSE_TABLE, last["$timestamp"], table_name
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
return conn.query(q)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_transaction_data(
|
|
42
|
+
conn: Cluster, table_name: str, begin: str, end: str
|
|
43
|
+
) -> List[Dict[str, Any]]:
|
|
44
|
+
"""
|
|
45
|
+
Gets all data from a certain table.
|
|
46
|
+
"""
|
|
47
|
+
q = 'SELECT * FROM "{}" IN RANGE ({}, {}) '.format(table_name, begin, end)
|
|
48
|
+
return conn.query(q)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_next(
|
|
52
|
+
conn: Cluster, table_name: str, state: Dict[str, Any]
|
|
53
|
+
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
54
|
+
|
|
55
|
+
# Our flow to retrieve new data is as follows:
|
|
56
|
+
# 1. Based on the state's last processed transaction, retrieve all transactions
|
|
57
|
+
# that are logged into the firehose since then.
|
|
58
|
+
# 2. For each of the transactions, verify we haven't seen it before
|
|
59
|
+
# 3. For each of the transactions, pull in all data
|
|
60
|
+
# 4. Concatenate all this data (in order of quasardb transaction)
|
|
61
|
+
|
|
62
|
+
txs = _get_transactions_since(conn, table_name, state["last"])
|
|
63
|
+
|
|
64
|
+
xs: List[Dict[str, Any]] = []
|
|
65
|
+
for tx in txs:
|
|
66
|
+
txid = tx["transaction_id"]
|
|
67
|
+
|
|
68
|
+
if state["last"] is not None and tx["$timestamp"] > state["last"]["$timestamp"]:
|
|
69
|
+
# At this point we are guaranteed that the transaction we encounter is
|
|
70
|
+
# 'new', will not conflict with any other transaction ids. It is thus
|
|
71
|
+
# safe to reset the txid set.
|
|
72
|
+
state["seen"] = set()
|
|
73
|
+
|
|
74
|
+
if txid not in state["seen"]:
|
|
75
|
+
xs = xs + _get_transaction_data(
|
|
76
|
+
conn,
|
|
77
|
+
table_name,
|
|
78
|
+
tx["begin"],
|
|
79
|
+
# The firehose logs transaction `end` span as
|
|
80
|
+
# end inclusive, while our bulk reader and/or query
|
|
81
|
+
# language are end exclusive.
|
|
82
|
+
tx["end"] + np.timedelta64(1, "ns"),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Because it is possible that multiple firehose changes are stored with the
|
|
86
|
+
# exact same $timestamp, we also keep track of the actually seen
|
|
87
|
+
# transaction ids.
|
|
88
|
+
state["seen"].add(txid)
|
|
89
|
+
|
|
90
|
+
state["last"] = tx
|
|
91
|
+
|
|
92
|
+
return (state, xs)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def subscribe(conn: Cluster, table_name: str) -> Iterator[Dict[str, Any]]:
|
|
96
|
+
state = _init()
|
|
97
|
+
|
|
98
|
+
while True:
|
|
99
|
+
# Note how this is effectively a never-ending fold loop
|
|
100
|
+
# that transforms state into a new state. This state effectively
|
|
101
|
+
# functions as a checkpoint.
|
|
102
|
+
#
|
|
103
|
+
# At a later point, we could choose to provide the user
|
|
104
|
+
# direct access to this 'state' object, so that they can
|
|
105
|
+
# implement e.g. mechanisms to replay from a certain checkpoint.
|
|
106
|
+
(state, xs) = _get_next(conn, table_name, state)
|
|
107
|
+
|
|
108
|
+
for x in xs:
|
|
109
|
+
yield x
|
|
110
|
+
|
|
111
|
+
# Our poll interval
|
|
112
|
+
time.sleep(POLL_INTERVAL)
|
quasardb/libqdb_api.so
ADDED
|
Binary file
|