chdb 3.7.1__cp38-abi3-musllinux_1_2_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chdb might be problematic. Click here for more details.
- chdb/__init__.py +255 -0
- chdb/__main__.py +38 -0
- chdb/_chdb.abi3.so +0 -0
- chdb/dataframe/__init__.py +24 -0
- chdb/dataframe/query.py +544 -0
- chdb/dbapi/__init__.py +134 -0
- chdb/dbapi/connections.py +257 -0
- chdb/dbapi/constants/FIELD_TYPE.py +31 -0
- chdb/dbapi/constants/__init__.py +0 -0
- chdb/dbapi/converters.py +611 -0
- chdb/dbapi/cursors.py +545 -0
- chdb/dbapi/err.py +300 -0
- chdb/dbapi/times.py +191 -0
- chdb/libpybind11nonlimitedapi_chdb_3.10.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.11.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.12.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.13.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.14.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.8.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.9.so +0 -0
- chdb/rwabc.py +65 -0
- chdb/session/__init__.py +3 -0
- chdb/session/state.py +287 -0
- chdb/state/__init__.py +3 -0
- chdb/state/sqlitelike.py +1101 -0
- chdb/udf/__init__.py +10 -0
- chdb/udf/udf.py +122 -0
- chdb/utils/__init__.py +15 -0
- chdb/utils/trace.py +105 -0
- chdb/utils/types.py +232 -0
- chdb-3.7.1.dist-info/LICENSE.txt +203 -0
- chdb-3.7.1.dist-info/METADATA +566 -0
- chdb-3.7.1.dist-info/RECORD +35 -0
- chdb-3.7.1.dist-info/WHEEL +5 -0
- chdb-3.7.1.dist-info/top_level.txt +2 -0
chdb/__init__.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import threading
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ChdbError(Exception):
|
|
7
|
+
"""Base exception class for chDB-related errors.
|
|
8
|
+
|
|
9
|
+
This exception is raised when chDB query execution fails or encounters
|
|
10
|
+
an error. It inherits from the standard Python Exception class and
|
|
11
|
+
provides error information from the underlying ClickHouse engine.
|
|
12
|
+
|
|
13
|
+
The exception message typically contains detailed error information
|
|
14
|
+
from ClickHouse, including syntax errors, type mismatches, missing
|
|
15
|
+
tables/columns, and other query execution issues.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
args: Tuple containing the error message and any additional arguments
|
|
19
|
+
|
|
20
|
+
Examples:
|
|
21
|
+
>>> try:
|
|
22
|
+
... result = chdb.query("SELECT * FROM non_existent_table")
|
|
23
|
+
... except chdb.ChdbError as e:
|
|
24
|
+
... print(f"Query failed: {e}")
|
|
25
|
+
Query failed: Table 'non_existent_table' doesn't exist
|
|
26
|
+
|
|
27
|
+
>>> try:
|
|
28
|
+
... result = chdb.query("SELECT invalid_syntax FROM")
|
|
29
|
+
... except chdb.ChdbError as e:
|
|
30
|
+
... print(f"Syntax error: {e}")
|
|
31
|
+
Syntax error: Syntax error near 'FROM'
|
|
32
|
+
|
|
33
|
+
Note:
|
|
34
|
+
This exception is automatically raised by chdb.query() and related
|
|
35
|
+
functions when the underlying ClickHouse engine reports an error.
|
|
36
|
+
You should catch this exception when handling potentially failing
|
|
37
|
+
queries to provide appropriate error handling in your application.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
_arrow_format = set({"dataframe", "arrowtable"})
|
|
42
|
+
_process_result_format_funs = {
|
|
43
|
+
"dataframe": lambda x: to_df(x),
|
|
44
|
+
"arrowtable": lambda x: to_arrowTable(x),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# If any UDF is defined, the path of the UDF will be set to this variable
|
|
48
|
+
# and the path will be deleted when the process exits
|
|
49
|
+
# UDF config path will be f"{g_udf_path}/udf_config.xml"
|
|
50
|
+
# UDF script path will be f"{g_udf_path}/{func_name}.py"
|
|
51
|
+
g_udf_path = ""
|
|
52
|
+
|
|
53
|
+
__version__ = "3.7.1"
|
|
54
|
+
if sys.version_info[:2] >= (3, 7):
|
|
55
|
+
# get the path of the current file
|
|
56
|
+
current_path = os.path.dirname(os.path.abspath(__file__))
|
|
57
|
+
# change the current working directory to the path of the current file
|
|
58
|
+
# and import _chdb then change the working directory back
|
|
59
|
+
cwd = os.getcwd()
|
|
60
|
+
os.chdir(current_path)
|
|
61
|
+
from . import _chdb # noqa
|
|
62
|
+
|
|
63
|
+
os.chdir(cwd)
|
|
64
|
+
conn = _chdb.connect()
|
|
65
|
+
engine_version = str(conn.query("SELECT version();", "CSV").bytes())[3:-4]
|
|
66
|
+
conn.close()
|
|
67
|
+
else:
|
|
68
|
+
raise NotImplementedError("Python 3.6 or lower version is not supported")
|
|
69
|
+
|
|
70
|
+
chdb_version = tuple(__version__.split('.'))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# return pyarrow table
|
|
74
|
+
def to_arrowTable(res):
|
|
75
|
+
"""Convert query result to PyArrow Table.
|
|
76
|
+
|
|
77
|
+
Converts a chDB query result to a PyArrow Table for efficient columnar data processing.
|
|
78
|
+
Returns an empty table if the result is empty.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
res: chDB query result object containing binary Arrow data
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
pa.Table: PyArrow Table containing the query results
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
ImportError: If pyarrow or pandas are not installed
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
>>> result = chdb.query("SELECT 1 as id, 'hello' as msg", "Arrow")
|
|
91
|
+
>>> table = chdb.to_arrowTable(result)
|
|
92
|
+
>>> print(table.to_pandas())
|
|
93
|
+
id msg
|
|
94
|
+
0 1 hello
|
|
95
|
+
"""
|
|
96
|
+
# try import pyarrow and pandas, if failed, raise ImportError with suggestion
|
|
97
|
+
try:
|
|
98
|
+
import pyarrow as pa # noqa
|
|
99
|
+
import pandas as pd # noqa
|
|
100
|
+
except ImportError as e:
|
|
101
|
+
print(f"ImportError: {e}")
|
|
102
|
+
print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
|
|
103
|
+
raise ImportError("Failed to import pyarrow or pandas") from None
|
|
104
|
+
if len(res) == 0:
|
|
105
|
+
return pa.Table.from_batches([], schema=pa.schema([]))
|
|
106
|
+
|
|
107
|
+
memview = res.get_memview()
|
|
108
|
+
return pa.RecordBatchFileReader(memview.view()).read_all()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# return pandas dataframe
|
|
112
|
+
def to_df(r):
|
|
113
|
+
"""Convert query result to pandas DataFrame.
|
|
114
|
+
|
|
115
|
+
Converts a chDB query result to a pandas DataFrame by first converting to
|
|
116
|
+
PyArrow Table and then to pandas using multi-threading for better performance.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
r: chDB query result object containing binary Arrow data
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
pd.DataFrame: pandas DataFrame containing the query results
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
ImportError: If pyarrow or pandas are not installed
|
|
126
|
+
|
|
127
|
+
Example:
|
|
128
|
+
>>> result = chdb.query("SELECT 1 as id, 'hello' as msg", "Arrow")
|
|
129
|
+
>>> df = chdb.to_df(result)
|
|
130
|
+
>>> print(df)
|
|
131
|
+
id msg
|
|
132
|
+
0 1 hello
|
|
133
|
+
"""
|
|
134
|
+
t = to_arrowTable(r)
|
|
135
|
+
return t.to_pandas(use_threads=True)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# global connection lock, for multi-threading use of legacy chdb.query()
|
|
139
|
+
g_conn_lock = threading.Lock()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# wrap _chdb functions
|
|
143
|
+
def query(sql, output_format="CSV", path="", udf_path=""):
|
|
144
|
+
"""Execute SQL query using chDB engine.
|
|
145
|
+
|
|
146
|
+
This is the main query function that executes SQL statements using the embedded
|
|
147
|
+
ClickHouse engine. Supports various output formats and can work with in-memory
|
|
148
|
+
or file-based databases.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
sql (str): SQL query string to execute
|
|
152
|
+
output_format (str, optional): Output format for results. Defaults to "CSV".
|
|
153
|
+
Supported formats include:
|
|
154
|
+
|
|
155
|
+
- "CSV" - Comma-separated values
|
|
156
|
+
- "JSON" - JSON format
|
|
157
|
+
- "Arrow" - Apache Arrow format
|
|
158
|
+
- "Parquet" - Parquet format
|
|
159
|
+
- "DataFrame" - Pandas DataFrame
|
|
160
|
+
- "ArrowTable" - PyArrow Table
|
|
161
|
+
- "Debug" - Enable verbose logging
|
|
162
|
+
|
|
163
|
+
path (str, optional): Database file path. Defaults to "" (in-memory database).
|
|
164
|
+
Can be a file path or ":memory:" for in-memory database.
|
|
165
|
+
udf_path (str, optional): Path to User-Defined Functions directory. Defaults to "".
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Query result in the specified format:
|
|
169
|
+
|
|
170
|
+
- str: For text formats like CSV, JSON
|
|
171
|
+
- pd.DataFrame: When output_format is "DataFrame" or "dataframe"
|
|
172
|
+
- pa.Table: When output_format is "ArrowTable" or "arrowtable"
|
|
173
|
+
- chdb result object: For other formats
|
|
174
|
+
|
|
175
|
+
Raises:
|
|
176
|
+
ChdbError: If the SQL query execution fails
|
|
177
|
+
ImportError: If required dependencies are missing for DataFrame/Arrow formats
|
|
178
|
+
|
|
179
|
+
Examples:
|
|
180
|
+
>>> # Basic CSV query
|
|
181
|
+
>>> result = chdb.query("SELECT 1, 'hello'")
|
|
182
|
+
>>> print(result)
|
|
183
|
+
"1,hello"
|
|
184
|
+
|
|
185
|
+
>>> # Query with DataFrame output
|
|
186
|
+
>>> df = chdb.query("SELECT 1 as id, 'hello' as msg", "DataFrame")
|
|
187
|
+
>>> print(df)
|
|
188
|
+
id msg
|
|
189
|
+
0 1 hello
|
|
190
|
+
|
|
191
|
+
>>> # Query with file-based database
|
|
192
|
+
>>> result = chdb.query("CREATE TABLE test (id INT)", path="mydb.chdb")
|
|
193
|
+
|
|
194
|
+
>>> # Query with UDF
|
|
195
|
+
>>> result = chdb.query("SELECT my_udf('test')", udf_path="/path/to/udfs")
|
|
196
|
+
"""
|
|
197
|
+
global g_udf_path
|
|
198
|
+
if udf_path != "":
|
|
199
|
+
g_udf_path = udf_path
|
|
200
|
+
conn_str = ""
|
|
201
|
+
if path == "":
|
|
202
|
+
conn_str = ":memory:"
|
|
203
|
+
else:
|
|
204
|
+
conn_str = f"{path}"
|
|
205
|
+
if g_udf_path != "":
|
|
206
|
+
if "?" in conn_str:
|
|
207
|
+
conn_str = f"{conn_str}&udf_path={g_udf_path}"
|
|
208
|
+
else:
|
|
209
|
+
conn_str = f"{conn_str}?udf_path={g_udf_path}"
|
|
210
|
+
if output_format == "Debug":
|
|
211
|
+
output_format = "CSV"
|
|
212
|
+
if "?" in conn_str:
|
|
213
|
+
conn_str = f"{conn_str}&verbose&log-level=test"
|
|
214
|
+
else:
|
|
215
|
+
conn_str = f"{conn_str}?verbose&log-level=test"
|
|
216
|
+
|
|
217
|
+
lower_output_format = output_format.lower()
|
|
218
|
+
result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
|
|
219
|
+
if lower_output_format in _arrow_format:
|
|
220
|
+
output_format = "Arrow"
|
|
221
|
+
|
|
222
|
+
with g_conn_lock:
|
|
223
|
+
conn = _chdb.connect(conn_str)
|
|
224
|
+
res = conn.query(sql, output_format)
|
|
225
|
+
if res.has_error():
|
|
226
|
+
conn.close()
|
|
227
|
+
raise ChdbError(res.error_message())
|
|
228
|
+
conn.close()
|
|
229
|
+
return result_func(res)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# alias for query
|
|
233
|
+
sql = query
|
|
234
|
+
|
|
235
|
+
PyReader = _chdb.PyReader
|
|
236
|
+
|
|
237
|
+
from . import dbapi, session, udf, utils # noqa: E402
|
|
238
|
+
from .state import connect # noqa: E402
|
|
239
|
+
|
|
240
|
+
__all__ = [
|
|
241
|
+
"_chdb",
|
|
242
|
+
"PyReader",
|
|
243
|
+
"ChdbError",
|
|
244
|
+
"query",
|
|
245
|
+
"sql",
|
|
246
|
+
"chdb_version",
|
|
247
|
+
"engine_version",
|
|
248
|
+
"to_df",
|
|
249
|
+
"to_arrowTable",
|
|
250
|
+
"dbapi",
|
|
251
|
+
"session",
|
|
252
|
+
"udf",
|
|
253
|
+
"utils",
|
|
254
|
+
"connect",
|
|
255
|
+
]
|
chdb/__main__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from .__init__ import query
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def main():
|
|
6
|
+
prog = 'python -m chdb'
|
|
7
|
+
custom_usage = "%(prog)s [-h] \"SELECT 1\" [format]"
|
|
8
|
+
description = ('''A simple command line interface for chdb
|
|
9
|
+
to run SQL and output in specified format''')
|
|
10
|
+
parser = argparse.ArgumentParser(prog=prog,
|
|
11
|
+
usage=custom_usage,
|
|
12
|
+
description=description)
|
|
13
|
+
parser.add_argument('sql', nargs=1,
|
|
14
|
+
type=str,
|
|
15
|
+
help='sql, e.g: select 1112222222,555')
|
|
16
|
+
parser.add_argument('format', nargs='?',
|
|
17
|
+
type=str,
|
|
18
|
+
help='''sql result output format,
|
|
19
|
+
e.g: CSV, Dataframe, JSON etc,
|
|
20
|
+
more format checkout on
|
|
21
|
+
https://clickhouse.com/docs/en/interfaces/formats''',
|
|
22
|
+
default="CSV")
|
|
23
|
+
options = parser.parse_args()
|
|
24
|
+
sql = options.sql[0]
|
|
25
|
+
output_format = options.format
|
|
26
|
+
res = query(sql, output_format)
|
|
27
|
+
try:
|
|
28
|
+
if output_format.lower() in ("dataframe", "arrowtable"):
|
|
29
|
+
temp = res
|
|
30
|
+
else:
|
|
31
|
+
temp = res.data()
|
|
32
|
+
print(temp, end="")
|
|
33
|
+
except UnicodeDecodeError:
|
|
34
|
+
print(repr(res.bytes()))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
if __name__ == '__main__':
|
|
38
|
+
main()
|
chdb/_chdb.abi3.so
ADDED
|
Binary file
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# try import pyarrow and pandas, if failed, raise ImportError with suggestion
|
|
2
|
+
try:
|
|
3
|
+
import pyarrow as pa # noqa
|
|
4
|
+
import pandas as pd # noqa
|
|
5
|
+
except ImportError as e:
|
|
6
|
+
print(f'ImportError: {e}')
|
|
7
|
+
print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
|
|
8
|
+
raise ImportError('Failed to import pyarrow or pandas') from None
|
|
9
|
+
|
|
10
|
+
# check if pandas version >= 2.0.0
|
|
11
|
+
try:
|
|
12
|
+
version_parts = pd.__version__.split('.')
|
|
13
|
+
major_version = int(version_parts[0])
|
|
14
|
+
if major_version < 2:
|
|
15
|
+
print('Please upgrade pandas to version 2.0.0 or higher to have better performance')
|
|
16
|
+
except (ValueError, IndexError, AttributeError):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
from .query import Table, pandas_read_parquet # noqa: C0413
|
|
20
|
+
|
|
21
|
+
query = Table.queryStatic
|
|
22
|
+
sql = Table.queryStatic
|
|
23
|
+
|
|
24
|
+
__all__ = ["Table", "query", "sql", "pandas_read_parquet"]
|