chdb 3.6.0__cp38-abi3-macosx_11_0_arm64.whl → 3.7.0__cp38-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/.flake8 ADDED
@@ -0,0 +1,5 @@
1
+ [flake8]
2
+ max-line-length = 130
3
+ extend-ignore = E722
4
+ exclude =
5
+ build/
chdb/__init__.py CHANGED
@@ -4,7 +4,38 @@ import threading
4
4
 
5
5
 
6
6
  class ChdbError(Exception):
7
- """Base class for exceptions in this module."""
7
+ """Base exception class for chDB-related errors.
8
+
9
+ This exception is raised when chDB query execution fails or encounters
10
+ an error. It inherits from the standard Python Exception class and
11
+ provides error information from the underlying ClickHouse engine.
12
+
13
+ The exception message typically contains detailed error information
14
+ from ClickHouse, including syntax errors, type mismatches, missing
15
+ tables/columns, and other query execution issues.
16
+
17
+ Attributes:
18
+ args: Tuple containing the error message and any additional arguments
19
+
20
+ Examples:
21
+ >>> try:
22
+ ... result = chdb.query("SELECT * FROM non_existent_table")
23
+ ... except chdb.ChdbError as e:
24
+ ... print(f"Query failed: {e}")
25
+ Query failed: Table 'non_existent_table' doesn't exist
26
+
27
+ >>> try:
28
+ ... result = chdb.query("SELECT invalid_syntax FROM")
29
+ ... except chdb.ChdbError as e:
30
+ ... print(f"Syntax error: {e}")
31
+ Syntax error: Syntax error near 'FROM'
32
+
33
+ Note:
34
+ This exception is automatically raised by chdb.query() and related
35
+ functions when the underlying ClickHouse engine reports an error.
36
+ You should catch this exception when handling potentially failing
37
+ queries to provide appropriate error handling in your application.
38
+ """
8
39
 
9
40
 
10
41
  _arrow_format = set({"dataframe", "arrowtable"})
@@ -19,7 +50,7 @@ _process_result_format_funs = {
19
50
  # UDF script path will be f"{g_udf_path}/{func_name}.py"
20
51
  g_udf_path = ""
21
52
 
22
- chdb_version = ('3', '6', '0')
53
+ __version__ = "3.7.0"
23
54
  if sys.version_info[:2] >= (3, 7):
24
55
  # get the path of the current file
25
56
  current_path = os.path.dirname(os.path.abspath(__file__))
@@ -36,17 +67,32 @@ if sys.version_info[:2] >= (3, 7):
36
67
  else:
37
68
  raise NotImplementedError("Python 3.6 or lower version is not supported")
38
69
 
39
- try:
40
- # Change here if project is renamed and does not equal the package name
41
- dist_name = __name__
42
- __version__ = ".".join(map(str, chdb_version))
43
- except: # noqa
44
- __version__ = "unknown"
70
+ chdb_version = tuple(__version__.split('.'))
45
71
 
46
72
 
47
73
  # return pyarrow table
48
74
  def to_arrowTable(res):
49
- """convert res to arrow table"""
75
+ """Convert query result to PyArrow Table.
76
+
77
+ Converts a chDB query result to a PyArrow Table for efficient columnar data processing.
78
+ Returns an empty table if the result is empty.
79
+
80
+ Args:
81
+ res: chDB query result object containing binary Arrow data
82
+
83
+ Returns:
84
+ pa.Table: PyArrow Table containing the query results
85
+
86
+ Raises:
87
+ ImportError: If pyarrow or pandas are not installed
88
+
89
+ Example:
90
+ >>> result = chdb.query("SELECT 1 as id, 'hello' as msg", "Arrow")
91
+ >>> table = chdb.to_arrowTable(result)
92
+ >>> print(table.to_pandas())
93
+ id msg
94
+ 0 1 hello
95
+ """
50
96
  # try import pyarrow and pandas, if failed, raise ImportError with suggestion
51
97
  try:
52
98
  import pyarrow as pa # noqa
@@ -57,12 +103,34 @@ def to_arrowTable(res):
57
103
  raise ImportError("Failed to import pyarrow or pandas") from None
58
104
  if len(res) == 0:
59
105
  return pa.Table.from_batches([], schema=pa.schema([]))
60
- return pa.RecordBatchFileReader(res.bytes()).read_all()
106
+
107
+ memview = res.get_memview()
108
+ return pa.RecordBatchFileReader(memview.view()).read_all()
61
109
 
62
110
 
63
111
  # return pandas dataframe
64
112
  def to_df(r):
65
- """convert arrow table to Dataframe"""
113
+ """Convert query result to pandas DataFrame.
114
+
115
+ Converts a chDB query result to a pandas DataFrame by first converting to
116
+ PyArrow Table and then to pandas using multi-threading for better performance.
117
+
118
+ Args:
119
+ r: chDB query result object containing binary Arrow data
120
+
121
+ Returns:
122
+ pd.DataFrame: pandas DataFrame containing the query results
123
+
124
+ Raises:
125
+ ImportError: If pyarrow or pandas are not installed
126
+
127
+ Example:
128
+ >>> result = chdb.query("SELECT 1 as id, 'hello' as msg", "Arrow")
129
+ >>> df = chdb.to_df(result)
130
+ >>> print(df)
131
+ id msg
132
+ 0 1 hello
133
+ """
66
134
  t = to_arrowTable(r)
67
135
  return t.to_pandas(use_threads=True)
68
136
 
@@ -73,6 +141,59 @@ g_conn_lock = threading.Lock()
73
141
 
74
142
  # wrap _chdb functions
75
143
  def query(sql, output_format="CSV", path="", udf_path=""):
144
+ """Execute SQL query using chDB engine.
145
+
146
+ This is the main query function that executes SQL statements using the embedded
147
+ ClickHouse engine. Supports various output formats and can work with in-memory
148
+ or file-based databases.
149
+
150
+ Args:
151
+ sql (str): SQL query string to execute
152
+ output_format (str, optional): Output format for results. Defaults to "CSV".
153
+ Supported formats include:
154
+
155
+ - "CSV" - Comma-separated values
156
+ - "JSON" - JSON format
157
+ - "Arrow" - Apache Arrow format
158
+ - "Parquet" - Parquet format
159
+ - "DataFrame" - Pandas DataFrame
160
+ - "ArrowTable" - PyArrow Table
161
+ - "Debug" - Enable verbose logging
162
+
163
+ path (str, optional): Database file path. Defaults to "" (in-memory database).
164
+ Can be a file path or ":memory:" for in-memory database.
165
+ udf_path (str, optional): Path to User-Defined Functions directory. Defaults to "".
166
+
167
+ Returns:
168
+ Query result in the specified format:
169
+
170
+ - str: For text formats like CSV, JSON
171
+ - pd.DataFrame: When output_format is "DataFrame" or "dataframe"
172
+ - pa.Table: When output_format is "ArrowTable" or "arrowtable"
173
+ - chdb result object: For other formats
174
+
175
+ Raises:
176
+ ChdbError: If the SQL query execution fails
177
+ ImportError: If required dependencies are missing for DataFrame/Arrow formats
178
+
179
+ Examples:
180
+ >>> # Basic CSV query
181
+ >>> result = chdb.query("SELECT 1, 'hello'")
182
+ >>> print(result)
183
+ "1,hello"
184
+
185
+ >>> # Query with DataFrame output
186
+ >>> df = chdb.query("SELECT 1 as id, 'hello' as msg", "DataFrame")
187
+ >>> print(df)
188
+ id msg
189
+ 0 1 hello
190
+
191
+ >>> # Query with file-based database
192
+ >>> result = chdb.query("CREATE TABLE test (id INT)", path="mydb.chdb")
193
+
194
+ >>> # Query with UDF
195
+ >>> result = chdb.query("SELECT my_udf('test')", udf_path="/path/to/udfs")
196
+ """
76
197
  global g_udf_path
77
198
  if udf_path != "":
78
199
  g_udf_path = udf_path
chdb/_chdb.abi3.so CHANGED
Binary file
chdb/build-musl.sh ADDED
@@ -0,0 +1,166 @@
1
+ #!/bin/bash
2
+
3
+ set -e
4
+
5
+ export USE_MUSL=1
6
+
7
+ build_type=${1:-Release}
8
+
9
+ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
10
+
11
+ . ${DIR}/vars.sh
12
+
13
+ BUILD_DIR=${PROJ_DIR}/buildlib
14
+
15
+ HDFS="-DENABLE_HDFS=1 -DENABLE_GSASL_LIBRARY=1 -DENABLE_KRB5=1"
16
+ MYSQL="-DENABLE_MYSQL=1"
17
+ RUST_FEATURES="-DENABLE_RUST=0"
18
+ if [ "$(uname)" == "Linux" ]; then
19
+ GLIBC_COMPATIBILITY="-DGLIBC_COMPATIBILITY=0"
20
+ UNWIND="-DUSE_UNWIND=1"
21
+ JEMALLOC="-DENABLE_JEMALLOC=0"
22
+ PYINIT_ENTRY="-Wl,-ePyInit_${CHDB_PY_MOD}"
23
+ ICU="-DENABLE_ICU=1"
24
+ SED_INPLACE="sed -i"
25
+ # only x86_64, enable AVX, enable embedded compiler
26
+ if [ "$(uname -m)" == "x86_64" ]; then
27
+ CPU_FEATURES="-DENABLE_AVX=1 -DENABLE_AVX2=0"
28
+ LLVM="-DENABLE_EMBEDDED_COMPILER=1 -DENABLE_DWARF_PARSER=1"
29
+ RUST_FEATURES="-DENABLE_RUST=1 -DENABLE_DELTA_KERNEL_RS=1"
30
+ CORROSION_CMAKE_FILE="${PROJ_DIR}/contrib/corrosion-cmake/CMakeLists.txt"
31
+ if [ -f "${CORROSION_CMAKE_FILE}" ]; then
32
+ if ! grep -q 'OPENSSL_NO_DEPRECATED_3_0' "${CORROSION_CMAKE_FILE}"; then
33
+ echo "Modifying corrosion CMakeLists.txt for Linux x86_64..."
34
+ ${SED_INPLACE} 's/corrosion_set_env_vars(${target_name} "RUSTFLAGS=${RUSTFLAGS}")/corrosion_set_env_vars(${target_name} "RUSTFLAGS=${RUSTFLAGS} --cfg osslconf=\\\"OPENSSL_NO_DEPRECATED_3_0\\\"")/g' "${CORROSION_CMAKE_FILE}"
35
+ else
36
+ echo "corrosion CMakeLists.txt already modified, skipping..."
37
+ fi
38
+ else
39
+ echo "Warning: corrosion CMakeLists.txt not found at ${CORROSION_CMAKE_FILE}"
40
+ fi
41
+ else
42
+ CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0 -DNO_ARMV81_OR_HIGHER=1"
43
+ LLVM="-DENABLE_EMBEDDED_COMPILER=0 -DENABLE_DWARF_PARSER=0"
44
+ fi
45
+ else
46
+ echo "OS not supported"
47
+ exit 1
48
+ fi
49
+
50
+ if [ ! -d $BUILD_DIR ]; then
51
+ mkdir $BUILD_DIR
52
+ fi
53
+
54
+ cd ${BUILD_DIR}
55
+ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \
56
+ -DENABLE_CLICKHOUSE_KEEPER=0 -DENABLE_CLICKHOUSE_KEEPER_CONVERTER=0 -DENABLE_CLICKHOUSE_LOCAL=1 -DENABLE_CLICKHOUSE_SU=0 -DENABLE_CLICKHOUSE_BENCHMARK=0 \
57
+ -DENABLE_AZURE_BLOB_STORAGE=1 -DENABLE_CLICKHOUSE_COPIER=0 -DENABLE_CLICKHOUSE_DISKS=0 -DENABLE_CLICKHOUSE_FORMAT=0 -DENABLE_CLICKHOUSE_GIT_IMPORT=0 \
58
+ -DENABLE_AWS_S3=1 -DENABLE_HIVE=0 -DENABLE_AVRO=1 \
59
+ -DENABLE_CLICKHOUSE_OBFUSCATOR=0 -DENABLE_CLICKHOUSE_ODBC_BRIDGE=0 -DENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER=0 \
60
+ -DENABLE_KAFKA=1 -DENABLE_LIBPQXX=1 -DENABLE_NATS=0 -DENABLE_AMQPCPP=0 -DENABLE_NURAFT=0 \
61
+ -DENABLE_CASSANDRA=0 -DENABLE_ODBC=0 -DENABLE_NLP=0 \
62
+ -DENABLE_LDAP=0 \
63
+ -DUSE_MUSL=1 \
64
+ -DRust_RUSTUP_INSTALL_MISSING_TARGET=ON \
65
+ ${MYSQL} \
66
+ ${HDFS} \
67
+ -DENABLE_LIBRARIES=0 ${RUST_FEATURES} \
68
+ ${GLIBC_COMPATIBILITY} \
69
+ -DENABLE_UTILS=0 ${LLVM} ${UNWIND} \
70
+ ${ICU} -DENABLE_UTF8PROC=1 ${JEMALLOC} \
71
+ -DENABLE_PARQUET=1 -DENABLE_ROCKSDB=1 -DENABLE_SQLITE=1 -DENABLE_VECTORSCAN=1 \
72
+ -DENABLE_PROTOBUF=1 -DENABLE_THRIFT=1 -DENABLE_MSGPACK=1 \
73
+ -DENABLE_BROTLI=1 -DENABLE_H3=1 -DENABLE_CURL=1 \
74
+ -DENABLE_CLICKHOUSE_ALL=0 -DUSE_STATIC_LIBRARIES=1 -DSPLIT_SHARED_LIBRARIES=0 \
75
+ -DENABLE_SIMDJSON=1 -DENABLE_RAPIDJSON=1 \
76
+ ${CPU_FEATURES} \
77
+ -DENABLE_AVX512=0 -DENABLE_AVX512_VBMI=0 \
78
+ -DENABLE_LIBFIU=1 \
79
+ ${COMPILER_CACHE} \
80
+ -DCHDB_VERSION=${CHDB_VERSION} \
81
+ "
82
+
83
+ BINARY=${BUILD_DIR}/programs/clickhouse
84
+
85
+ # build chdb python module
86
+ py_version="3.8"
87
+ current_py_version=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
88
+ if [ "$current_py_version" != "$py_version" ]; then
89
+ echo "Error: Current Python version is $current_py_version, but required version is $py_version"
90
+ echo "Please switch to Python $py_version using: pyenv shell $py_version"
91
+ exit 1
92
+ fi
93
+ echo "Using Python version: $current_py_version"
94
+ cmake ${CMAKE_ARGS} -DENABLE_PYTHON=1 -DPYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION=${py_version} ..
95
+ ninja -d keeprsp || true
96
+
97
+ # del the binary and run ninja -v again to capture the command, then modify it to generate CHDB_PY_MODULE
98
+ /bin/rm -f ${BINARY}
99
+ cd ${BUILD_DIR}
100
+ ninja -d keeprsp -v > build.log || true
101
+
102
+ USING_RESPONSE_FILE=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log | grep '@CMakeFiles/clickhouse.rsp' || true)
103
+
104
+ if [ ! "${USING_RESPONSE_FILE}" == "" ]; then
105
+ if [ -f CMakeFiles/clickhouse.rsp ]; then
106
+ cp -a CMakeFiles/clickhouse.rsp CMakeFiles/pychdb.rsp
107
+ else
108
+ echo "CMakeFiles/clickhouse.rsp not found"
109
+ exit 1
110
+ fi
111
+ fi
112
+
113
+ # extract the command to generate CHDB_PY_MODULE
114
+ PYCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \
115
+ | sed "s/-o programs\/clickhouse/-fPIC -Wl,-undefined,dynamic_lookup -shared ${PYINIT_ENTRY} -o ${CHDB_PY_MODULE}/" \
116
+ | sed 's/^[^&]*&& //' | sed 's/&&.*//' \
117
+ | sed 's/ -Wl,-undefined,error/ -Wl,-undefined,dynamic_lookup/g' \
118
+ | sed 's/ -Xlinker --no-undefined//g' \
119
+ | sed 's/@CMakeFiles\/clickhouse.rsp/@CMakeFiles\/pychdb.rsp/g' \
120
+ )
121
+
122
+ PYCHDB_CMD=$(echo ${PYCHDB_CMD} | sed 's/ src\/CMakeFiles\/clickhouse_malloc.dir\/Common\/stubFree.c.o//g')
123
+ if [ ! "${USING_RESPONSE_FILE}" == "" ]; then
124
+ ${SED_INPLACE} 's/ src\/CMakeFiles\/clickhouse_malloc.dir\/Common\/stubFree.c.o//g' CMakeFiles/pychdb.rsp
125
+ fi
126
+
127
+ PYCHDB_CMD=$(echo ${PYCHDB_CMD} | sed 's|-Wl,-rpath,/[^[:space:]]*/pybind11-cmake|-Wl,-rpath,\$ORIGIN|g')
128
+
129
+ echo ${PYCHDB_CMD} > pychdb_cmd.sh
130
+
131
+ ${PYCHDB_CMD}
132
+
133
+ ls -lh ${CHDB_PY_MODULE}
134
+
135
+ PYCHDB=${BUILD_DIR}/${CHDB_PY_MODULE}
136
+
137
+ if [ ${build_type} == "Debug" ]; then
138
+ echo -e "\nDebug build, skip strip"
139
+ else
140
+ echo -e "\nStrip the binary:"
141
+ ${STRIP} --remove-section=.comment --remove-section=.note ${PYCHDB}
142
+ fi
143
+ echo -e "\nStripe the binary:"
144
+
145
+ echo -e "\nPYCHDB: ${PYCHDB}"
146
+ ls -lh ${PYCHDB}
147
+ echo -e "\nldd ${PYCHDB}"
148
+ ${LDD} ${PYCHDB} || echo "Binary is statically linked (not a dynamic executable)"
149
+ echo -e "\nfile info of ${PYCHDB}"
150
+ file ${PYCHDB}
151
+
152
+ rm -f ${CHDB_DIR}/*.so
153
+ cp -a ${PYCHDB} ${CHDB_DIR}/${CHDB_PY_MODULE}
154
+
155
+ echo -e "\nSymbols:"
156
+ echo -e "\nPyInit in PYCHDB: ${PYCHDB}"
157
+ ${NM} ${PYCHDB} | grep PyInit || true
158
+ echo -e "\nquery_stable in PYCHDB: ${PYCHDB}"
159
+ ${NM} ${PYCHDB} | grep query_stable || true
160
+
161
+ echo -e "\nAfter copy:"
162
+ cd ${PROJ_DIR} && pwd
163
+
164
+ ccache -s || true
165
+
166
+ CMAKE_ARGS="${CMAKE_ARGS}" bash ${DIR}/build_pybind11.sh --all