chdb 3.7.1__cp38-abi3-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/__init__.py ADDED
@@ -0,0 +1,255 @@
1
+ import sys
2
+ import os
3
+ import threading
4
+
5
+
6
+ class ChdbError(Exception):
7
+ """Base exception class for chDB-related errors.
8
+
9
+ This exception is raised when chDB query execution fails or encounters
10
+ an error. It inherits from the standard Python Exception class and
11
+ provides error information from the underlying ClickHouse engine.
12
+
13
+ The exception message typically contains detailed error information
14
+ from ClickHouse, including syntax errors, type mismatches, missing
15
+ tables/columns, and other query execution issues.
16
+
17
+ Attributes:
18
+ args: Tuple containing the error message and any additional arguments
19
+
20
+ Examples:
21
+ >>> try:
22
+ ... result = chdb.query("SELECT * FROM non_existent_table")
23
+ ... except chdb.ChdbError as e:
24
+ ... print(f"Query failed: {e}")
25
+ Query failed: Table 'non_existent_table' doesn't exist
26
+
27
+ >>> try:
28
+ ... result = chdb.query("SELECT invalid_syntax FROM")
29
+ ... except chdb.ChdbError as e:
30
+ ... print(f"Syntax error: {e}")
31
+ Syntax error: Syntax error near 'FROM'
32
+
33
+ Note:
34
+ This exception is automatically raised by chdb.query() and related
35
+ functions when the underlying ClickHouse engine reports an error.
36
+ You should catch this exception when handling potentially failing
37
+ queries to provide appropriate error handling in your application.
38
+ """
39
+
40
+
41
+ _arrow_format = set({"dataframe", "arrowtable"})
42
+ _process_result_format_funs = {
43
+ "dataframe": lambda x: to_df(x),
44
+ "arrowtable": lambda x: to_arrowTable(x),
45
+ }
46
+
47
+ # If any UDF is defined, the path of the UDF will be set to this variable
48
+ # and the path will be deleted when the process exits
49
+ # UDF config path will be f"{g_udf_path}/udf_config.xml"
50
+ # UDF script path will be f"{g_udf_path}/{func_name}.py"
51
+ g_udf_path = ""
52
+
53
+ __version__ = "3.7.1"
54
+ if sys.version_info[:2] >= (3, 7):
55
+ # get the path of the current file
56
+ current_path = os.path.dirname(os.path.abspath(__file__))
57
+ # change the current working directory to the path of the current file
58
+ # and import _chdb then change the working directory back
59
+ cwd = os.getcwd()
60
+ os.chdir(current_path)
61
+ from . import _chdb # noqa
62
+
63
+ os.chdir(cwd)
64
+ conn = _chdb.connect()
65
+ engine_version = str(conn.query("SELECT version();", "CSV").bytes())[3:-4]
66
+ conn.close()
67
+ else:
68
+ raise NotImplementedError("Python 3.6 or lower version is not supported")
69
+
70
+ chdb_version = tuple(__version__.split('.'))
71
+
72
+
73
+ # return pyarrow table
74
+ def to_arrowTable(res):
75
+ """Convert query result to PyArrow Table.
76
+
77
+ Converts a chDB query result to a PyArrow Table for efficient columnar data processing.
78
+ Returns an empty table if the result is empty.
79
+
80
+ Args:
81
+ res: chDB query result object containing binary Arrow data
82
+
83
+ Returns:
84
+ pa.Table: PyArrow Table containing the query results
85
+
86
+ Raises:
87
+ ImportError: If pyarrow or pandas are not installed
88
+
89
+ Example:
90
+ >>> result = chdb.query("SELECT 1 as id, 'hello' as msg", "Arrow")
91
+ >>> table = chdb.to_arrowTable(result)
92
+ >>> print(table.to_pandas())
93
+ id msg
94
+ 0 1 hello
95
+ """
96
+ # try import pyarrow and pandas, if failed, raise ImportError with suggestion
97
+ try:
98
+ import pyarrow as pa # noqa
99
+ import pandas as pd # noqa
100
+ except ImportError as e:
101
+ print(f"ImportError: {e}")
102
+ print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
103
+ raise ImportError("Failed to import pyarrow or pandas") from None
104
+ if len(res) == 0:
105
+ return pa.Table.from_batches([], schema=pa.schema([]))
106
+
107
+ memview = res.get_memview()
108
+ return pa.RecordBatchFileReader(memview.view()).read_all()
109
+
110
+
111
+ # return pandas dataframe
112
+ def to_df(r):
113
+ """Convert query result to pandas DataFrame.
114
+
115
+ Converts a chDB query result to a pandas DataFrame by first converting to
116
+ PyArrow Table and then to pandas using multi-threading for better performance.
117
+
118
+ Args:
119
+ r: chDB query result object containing binary Arrow data
120
+
121
+ Returns:
122
+ pd.DataFrame: pandas DataFrame containing the query results
123
+
124
+ Raises:
125
+ ImportError: If pyarrow or pandas are not installed
126
+
127
+ Example:
128
+ >>> result = chdb.query("SELECT 1 as id, 'hello' as msg", "Arrow")
129
+ >>> df = chdb.to_df(result)
130
+ >>> print(df)
131
+ id msg
132
+ 0 1 hello
133
+ """
134
+ t = to_arrowTable(r)
135
+ return t.to_pandas(use_threads=True)
136
+
137
+
138
+ # global connection lock, for multi-threading use of legacy chdb.query()
139
+ g_conn_lock = threading.Lock()
140
+
141
+
142
+ # wrap _chdb functions
143
+ def query(sql, output_format="CSV", path="", udf_path=""):
144
+ """Execute SQL query using chDB engine.
145
+
146
+ This is the main query function that executes SQL statements using the embedded
147
+ ClickHouse engine. Supports various output formats and can work with in-memory
148
+ or file-based databases.
149
+
150
+ Args:
151
+ sql (str): SQL query string to execute
152
+ output_format (str, optional): Output format for results. Defaults to "CSV".
153
+ Supported formats include:
154
+
155
+ - "CSV" - Comma-separated values
156
+ - "JSON" - JSON format
157
+ - "Arrow" - Apache Arrow format
158
+ - "Parquet" - Parquet format
159
+ - "DataFrame" - Pandas DataFrame
160
+ - "ArrowTable" - PyArrow Table
161
+ - "Debug" - Enable verbose logging
162
+
163
+ path (str, optional): Database file path. Defaults to "" (in-memory database).
164
+ Can be a file path or ":memory:" for in-memory database.
165
+ udf_path (str, optional): Path to User-Defined Functions directory. Defaults to "".
166
+
167
+ Returns:
168
+ Query result in the specified format:
169
+
170
+ - str: For text formats like CSV, JSON
171
+ - pd.DataFrame: When output_format is "DataFrame" or "dataframe"
172
+ - pa.Table: When output_format is "ArrowTable" or "arrowtable"
173
+ - chdb result object: For other formats
174
+
175
+ Raises:
176
+ ChdbError: If the SQL query execution fails
177
+ ImportError: If required dependencies are missing for DataFrame/Arrow formats
178
+
179
+ Examples:
180
+ >>> # Basic CSV query
181
+ >>> result = chdb.query("SELECT 1, 'hello'")
182
+ >>> print(result)
183
+ "1,hello"
184
+
185
+ >>> # Query with DataFrame output
186
+ >>> df = chdb.query("SELECT 1 as id, 'hello' as msg", "DataFrame")
187
+ >>> print(df)
188
+ id msg
189
+ 0 1 hello
190
+
191
+ >>> # Query with file-based database
192
+ >>> result = chdb.query("CREATE TABLE test (id INT)", path="mydb.chdb")
193
+
194
+ >>> # Query with UDF
195
+ >>> result = chdb.query("SELECT my_udf('test')", udf_path="/path/to/udfs")
196
+ """
197
+ global g_udf_path
198
+ if udf_path != "":
199
+ g_udf_path = udf_path
200
+ conn_str = ""
201
+ if path == "":
202
+ conn_str = ":memory:"
203
+ else:
204
+ conn_str = f"{path}"
205
+ if g_udf_path != "":
206
+ if "?" in conn_str:
207
+ conn_str = f"{conn_str}&udf_path={g_udf_path}"
208
+ else:
209
+ conn_str = f"{conn_str}?udf_path={g_udf_path}"
210
+ if output_format == "Debug":
211
+ output_format = "CSV"
212
+ if "?" in conn_str:
213
+ conn_str = f"{conn_str}&verbose&log-level=test"
214
+ else:
215
+ conn_str = f"{conn_str}?verbose&log-level=test"
216
+
217
+ lower_output_format = output_format.lower()
218
+ result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
219
+ if lower_output_format in _arrow_format:
220
+ output_format = "Arrow"
221
+
222
+ with g_conn_lock:
223
+ conn = _chdb.connect(conn_str)
224
+ res = conn.query(sql, output_format)
225
+ if res.has_error():
226
+ conn.close()
227
+ raise ChdbError(res.error_message())
228
+ conn.close()
229
+ return result_func(res)
230
+
231
+
232
+ # alias for query
233
+ sql = query
234
+
235
+ PyReader = _chdb.PyReader
236
+
237
+ from . import dbapi, session, udf, utils # noqa: E402
238
+ from .state import connect # noqa: E402
239
+
240
+ __all__ = [
241
+ "_chdb",
242
+ "PyReader",
243
+ "ChdbError",
244
+ "query",
245
+ "sql",
246
+ "chdb_version",
247
+ "engine_version",
248
+ "to_df",
249
+ "to_arrowTable",
250
+ "dbapi",
251
+ "session",
252
+ "udf",
253
+ "utils",
254
+ "connect",
255
+ ]
chdb/__main__.py ADDED
@@ -0,0 +1,38 @@
1
+ import argparse
2
+ from .__init__ import query
3
+
4
+
5
+ def main():
6
+ prog = 'python -m chdb'
7
+ custom_usage = "%(prog)s [-h] \"SELECT 1\" [format]"
8
+ description = ('''A simple command line interface for chdb
9
+ to run SQL and output in specified format''')
10
+ parser = argparse.ArgumentParser(prog=prog,
11
+ usage=custom_usage,
12
+ description=description)
13
+ parser.add_argument('sql', nargs=1,
14
+ type=str,
15
+ help='sql, e.g: select 1112222222,555')
16
+ parser.add_argument('format', nargs='?',
17
+ type=str,
18
+ help='''sql result output format,
19
+ e.g: CSV, Dataframe, JSON etc,
20
+ more format checkout on
21
+ https://clickhouse.com/docs/en/interfaces/formats''',
22
+ default="CSV")
23
+ options = parser.parse_args()
24
+ sql = options.sql[0]
25
+ output_format = options.format
26
+ res = query(sql, output_format)
27
+ try:
28
+ if output_format.lower() in ("dataframe", "arrowtable"):
29
+ temp = res
30
+ else:
31
+ temp = res.data()
32
+ print(temp, end="")
33
+ except UnicodeDecodeError:
34
+ print(repr(res.bytes()))
35
+
36
+
37
+ if __name__ == '__main__':
38
+ main()
chdb/_chdb.abi3.so ADDED
Binary file
@@ -0,0 +1,24 @@
1
+ # try import pyarrow and pandas, if failed, raise ImportError with suggestion
2
+ try:
3
+ import pyarrow as pa # noqa
4
+ import pandas as pd # noqa
5
+ except ImportError as e:
6
+ print(f'ImportError: {e}')
7
+ print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
8
+ raise ImportError('Failed to import pyarrow or pandas') from None
9
+
10
+ # check if pandas version >= 2.0.0
11
+ try:
12
+ version_parts = pd.__version__.split('.')
13
+ major_version = int(version_parts[0])
14
+ if major_version < 2:
15
+ print('Please upgrade pandas to version 2.0.0 or higher to have better performance')
16
+ except (ValueError, IndexError, AttributeError):
17
+ pass
18
+
19
+ from .query import Table, pandas_read_parquet # noqa: C0413
20
+
21
+ query = Table.queryStatic
22
+ sql = Table.queryStatic
23
+
24
+ __all__ = ["Table", "query", "sql", "pandas_read_parquet"]