chdb 3.3.0__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/dbapi/err.py ADDED
@@ -0,0 +1,61 @@
1
+ class StandardError(Exception):
2
+ """Exception related to operation with chdb."""
3
+
4
+
5
+ class Warning(StandardError):
6
+ """Exception raised for important warnings like data truncations
7
+ while inserting, etc."""
8
+
9
+
10
+ class Error(StandardError):
11
+ """Exception that is the base class of all other error exceptions
12
+ (not Warning)."""
13
+
14
+
15
+ class InterfaceError(Error):
16
+ """Exception raised for errors that are related to the database
17
+ interface rather than the database itself."""
18
+
19
+
20
+ class DatabaseError(Error):
21
+ """Exception raised for errors that are related to the
22
+ database."""
23
+
24
+
25
+ class DataError(DatabaseError):
26
+ """Exception raised for errors that are due to problems with the
27
+ processed data like division by zero, numeric value out of range,
28
+ etc."""
29
+
30
+
31
+ class OperationalError(DatabaseError):
32
+ """Exception raised for errors that are related to the database's
33
+ operation and not necessarily under the control of the programmer,
34
+ e.g. an unexpected disconnect occurs, the data source name is not
35
+ found, a transaction could not be processed, a memory allocation
36
+ error occurred during processing, etc."""
37
+
38
+
39
+ class IntegrityError(DatabaseError):
40
+ """Exception raised when the relational integrity of the database
41
+ is affected, e.g. a foreign key check fails, duplicate key,
42
+ etc."""
43
+
44
+
45
+ class InternalError(DatabaseError):
46
+ """Exception raised when the database encounters an internal
47
+ error, e.g. the cursor is not valid anymore, the transaction is
48
+ out of sync, etc."""
49
+
50
+
51
+ class ProgrammingError(DatabaseError):
52
+ """Exception raised for programming errors, e.g. table not found
53
+ or already exists, syntax error in the SQL statement, wrong number
54
+ of parameters specified, etc."""
55
+
56
+
57
+ class NotSupportedError(DatabaseError):
58
+ """Exception raised in case a method or database API was used
59
+ which is not supported by the database, e.g. requesting a
60
+ .rollback() on a connection that does not support transaction or
61
+ has transactions turned off."""
chdb/dbapi/times.py ADDED
@@ -0,0 +1,20 @@
1
+ from time import localtime
2
+ from datetime import date, datetime, time, timedelta
3
+
4
+
5
+ Date = date
6
+ Time = time
7
+ TimeDelta = timedelta
8
+ Timestamp = datetime
9
+
10
+
11
+ def DateFromTicks(ticks):
12
+ return date(*localtime(ticks)[:3])
13
+
14
+
15
+ def TimeFromTicks(ticks):
16
+ return time(*localtime(ticks)[3:6])
17
+
18
+
19
+ def TimestampFromTicks(ticks):
20
+ return datetime(*localtime(ticks)[:6])
chdb/rwabc.py ADDED
@@ -0,0 +1,65 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Any
3
+
4
+
5
+ class PyReader(ABC):
6
+ def __init__(self, data: Any):
7
+ """
8
+ Initialize the reader with data. The exact type and structure of `data` can vary.
9
+
10
+ Args:
11
+ data (Any): The data with which to initialize the reader, format and type are not strictly defined.
12
+ """
13
+ self.data = data
14
+
15
+ @abstractmethod
16
+ def read(self, col_names: List[str], count: int) -> List[Any]:
17
+ """
18
+ Read a specified number of rows from the given columns and return a list of objects,
19
+ where each object is a sequence of values for a column.
20
+
21
+ Args:
22
+ col_names (List[str]): List of column names to read.
23
+ count (int): Maximum number of rows to read.
24
+
25
+ Returns:
26
+ List[Any]: List of sequences, one for each column.
27
+ """
28
+ pass
29
+
30
+
31
+ class PyWriter(ABC):
32
+ def __init__(self, col_names: List[str], types: List[type], data: Any):
33
+ """
34
+ Initialize the writer with column names, their types, and initial data.
35
+
36
+ Args:
37
+ col_names (List[str]): List of column names.
38
+ types (List[type]): List of types corresponding to each column.
39
+ data (Any): Initial data to setup the writer, format and type are not strictly defined.
40
+ """
41
+ self.col_names = col_names
42
+ self.types = types
43
+ self.data = data
44
+ self.blocks = []
45
+
46
+ @abstractmethod
47
+ def write(self, col_names: List[str], columns: List[List[Any]]) -> None:
48
+ """
49
+ Save columns of data to blocks. Must be implemented by subclasses.
50
+
51
+ Args:
52
+ col_names (List[str]): List of column names that are being written.
53
+ columns (List[List[Any]]): List of columns data, each column is represented by a list.
54
+ """
55
+ pass
56
+
57
+ @abstractmethod
58
+ def finalize(self) -> bytes:
59
+ """
60
+ Assemble and return the final data from blocks. Must be implemented by subclasses.
61
+
62
+ Returns:
63
+ bytes: The final serialized data.
64
+ """
65
+ pass
@@ -0,0 +1,3 @@
1
+ from .state import Session
2
+
3
+ __all__ = ["Session"]
chdb/session/state.py ADDED
@@ -0,0 +1,135 @@
1
+ import tempfile
2
+ import shutil
3
+ import warnings
4
+
5
+ import chdb
6
+ from ..state import sqlitelike as chdb_stateful
7
+ from ..state.sqlitelike import StreamingResult
8
+
9
+ g_session = None
10
+ g_session_path = None
11
+
12
+
13
+ class Session:
14
+ """
15
+ Session will keep the state of query.
16
+ If path is None, it will create a temporary directory and use it as the database path
17
+ and the temporary directory will be removed when the session is closed.
18
+ You can also pass in a path to create a database at that path where will keep your data.
19
+
20
+ You can also use a connection string to pass in the path and other parameters.
21
+ Examples:
22
+ - ":memory:" (for in-memory database)
23
+ - "test.db" (for relative path)
24
+ - "file:test.db" (same as above)
25
+ - "/path/to/test.db" (for absolute path)
26
+ - "file:/path/to/test.db" (same as above)
27
+ - "file:test.db?param1=value1&param2=value2" (for relative path with query params)
28
+ - "file::memory:?verbose&log-level=test" (for in-memory database with query params)
29
+ - "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
30
+
31
+ Connection string args handling:
32
+ Connection string can contain query params like "file:test.db?param1=value1&param2=value2"
33
+ "param1=value1" will be passed to ClickHouse engine as start up args.
34
+
35
+ For more details, see `clickhouse local --help --verbose`
36
+ Some special args handling:
37
+ - "mode=ro" would be "--readonly=1" for clickhouse (read-only mode)
38
+
39
+ Important:
40
+ - There can be only one session at a time. If you want to create a new session, you need to close the existing one.
41
+ - Creating a new session will close the existing one.
42
+ """
43
+
44
+ def __init__(self, path=None):
45
+ global g_session, g_session_path
46
+ if g_session is not None:
47
+ warnings.warn(
48
+ "There is already an active session. Creating a new session will close the existing one. "
49
+ "It is recommended to close the existing session before creating a new one. "
50
+ f"Closing the existing session {g_session_path}"
51
+ )
52
+ g_session.close()
53
+ g_session_path = None
54
+ if path is None or ":memory:" in path:
55
+ self._cleanup = True
56
+ self._path = tempfile.mkdtemp()
57
+ else:
58
+ self._cleanup = False
59
+ self._path = path
60
+ if chdb.g_udf_path != "":
61
+ self._udf_path = chdb.g_udf_path
62
+ # add udf_path to conn_str here.
63
+ # - the `user_scripts_path` will be the value of `udf_path`
64
+ # - the `user_defined_executable_functions_config` will be `user_scripts_path/*.xml`
65
+ # Both of them will be added to the conn_str in the Connection class
66
+ if "?" in self._path:
67
+ self._conn_str = f"{self._path}&udf_path={self._udf_path}"
68
+ else:
69
+ self._conn_str = f"{self._path}?udf_path={self._udf_path}"
70
+ else:
71
+ self._udf_path = ""
72
+ self._conn_str = f"{self._path}"
73
+ self._conn = chdb_stateful.Connection(self._conn_str)
74
+ g_session = self
75
+ g_session_path = self._path
76
+
77
+ def __del__(self):
78
+ self.close()
79
+
80
+ def __enter__(self):
81
+ return self
82
+
83
+ def __exit__(self, exc_type, exc_value, traceback):
84
+ self.close()
85
+
86
+ def close(self):
87
+ if self._cleanup:
88
+ self.cleanup()
89
+ if self._conn is not None:
90
+ self._conn.close()
91
+ self._conn = None
92
+ global g_session, g_session_path
93
+ g_session = None
94
+ g_session_path = None
95
+
96
+ def cleanup(self):
97
+ try:
98
+ if self._conn is not None:
99
+ self._conn.close()
100
+ self._conn = None
101
+ shutil.rmtree(self._path)
102
+ global g_session, g_session_path
103
+ g_session = None
104
+ g_session_path = None
105
+ except: # noqa
106
+ pass
107
+
108
+ def query(self, sql, fmt="CSV", udf_path=""):
109
+ """
110
+ Execute a query.
111
+ """
112
+ if fmt == "Debug":
113
+ warnings.warn(
114
+ """Debug format is not supported in Session.query
115
+ Please try use parameters in connection string instead:
116
+ Eg: conn = connect(f"db_path?verbose&log-level=test")"""
117
+ )
118
+ fmt = "CSV"
119
+ return self._conn.query(sql, fmt)
120
+
121
+ # alias sql = query
122
+ sql = query
123
+
124
+ def send_query(self, sql, fmt="CSV") -> StreamingResult:
125
+ """
126
+ Execute a streaming query.
127
+ """
128
+ if fmt == "Debug":
129
+ warnings.warn(
130
+ """Debug format is not supported in Session.query
131
+ Please try use parameters in connection string instead:
132
+ Eg: conn = connect(f"db_path?verbose&log-level=test")"""
133
+ )
134
+ fmt = "CSV"
135
+ return self._conn.send_query(sql, fmt)
chdb/state/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .sqlitelike import connect
2
+
3
+ __all__ = ["connect"]
@@ -0,0 +1,336 @@
1
+ from typing import Optional, Any
2
+ from chdb import _chdb
3
+
4
+ # try import pyarrow if failed, raise ImportError with suggestion
5
+ try:
6
+ import pyarrow as pa # noqa
7
+ except ImportError as e:
8
+ print(f"ImportError: {e}")
9
+ print('Please install pyarrow via "pip install pyarrow"')
10
+ raise ImportError("Failed to import pyarrow") from None
11
+
12
+
13
+ _arrow_format = set({"dataframe", "arrowtable"})
14
+ _process_result_format_funs = {
15
+ "dataframe": lambda x: to_df(x),
16
+ "arrowtable": lambda x: to_arrowTable(x),
17
+ }
18
+
19
+
20
+ # return pyarrow table
21
+ def to_arrowTable(res):
22
+ """convert res to arrow table"""
23
+ # try import pyarrow and pandas, if failed, raise ImportError with suggestion
24
+ try:
25
+ import pyarrow as pa # noqa
26
+ import pandas as pd # noqa
27
+ except ImportError as e:
28
+ print(f"ImportError: {e}")
29
+ print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
30
+ raise ImportError("Failed to import pyarrow or pandas") from None
31
+ if len(res) == 0:
32
+ return pa.Table.from_batches([], schema=pa.schema([]))
33
+ return pa.RecordBatchFileReader(res.bytes()).read_all()
34
+
35
+
36
+ # return pandas dataframe
37
+ def to_df(r):
38
+ """convert arrow table to Dataframe"""
39
+ t = to_arrowTable(r)
40
+ return t.to_pandas(use_threads=True)
41
+
42
+
43
+ class StreamingResult:
44
+ def __init__(self, c_result, conn, result_func):
45
+ self._result = c_result
46
+ self._result_func = result_func
47
+ self._conn = conn
48
+ self._exhausted = False
49
+
50
+ def fetch(self):
51
+ """Fetch next chunk of streaming results"""
52
+ if self._exhausted:
53
+ return None
54
+
55
+ try:
56
+ result = self._conn.streaming_fetch_result(self._result)
57
+ if result is None or result.rows_read() == 0:
58
+ self._exhausted = True
59
+ return None
60
+ return self._result_func(result)
61
+ except Exception as e:
62
+ self._exhausted = True
63
+ raise RuntimeError(f"Streaming query failed: {str(e)}") from e
64
+
65
+ def __iter__(self):
66
+ return self
67
+
68
+ def __next__(self):
69
+ if self._exhausted:
70
+ raise StopIteration
71
+
72
+ chunk = self.fetch()
73
+ if chunk is None:
74
+ self._exhausted = True
75
+ raise StopIteration
76
+
77
+ return chunk
78
+
79
+ def __enter__(self):
80
+ return self
81
+
82
+ def __exit__(self, exc_type, exc_val, exc_tb):
83
+ pass
84
+
85
+ def cancel(self):
86
+ self._exhausted = True
87
+
88
+ try:
89
+ self._conn.streaming_cancel_query(self._result)
90
+ except Exception as e:
91
+ raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
92
+
93
+
94
+ class Connection:
95
+ def __init__(self, connection_string: str):
96
+ # print("Connection", connection_string)
97
+ self._cursor: Optional[Cursor] = None
98
+ self._conn = _chdb.connect(connection_string)
99
+
100
+ def cursor(self) -> "Cursor":
101
+ self._cursor = Cursor(self._conn)
102
+ return self._cursor
103
+
104
+ def query(self, query: str, format: str = "CSV") -> Any:
105
+ lower_output_format = format.lower()
106
+ result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
107
+ if lower_output_format in _arrow_format:
108
+ format = "Arrow"
109
+
110
+ result = self._conn.query(query, format)
111
+ return result_func(result)
112
+
113
+ def send_query(self, query: str, format: str = "CSV") -> StreamingResult:
114
+ lower_output_format = format.lower()
115
+ result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
116
+ if lower_output_format in _arrow_format:
117
+ format = "Arrow"
118
+
119
+ c_stream_result = self._conn.send_query(query, format)
120
+ return StreamingResult(c_stream_result, self._conn, result_func)
121
+
122
+ def close(self) -> None:
123
+ # print("close")
124
+ if self._cursor:
125
+ self._cursor.close()
126
+ self._conn.close()
127
+
128
+
129
+ class Cursor:
130
+ def __init__(self, connection):
131
+ self._conn = connection
132
+ self._cursor = self._conn.cursor()
133
+ self._current_table: Optional[pa.Table] = None
134
+ self._current_row: int = 0
135
+
136
+ def execute(self, query: str) -> None:
137
+ self._cursor.execute(query)
138
+ result_mv = self._cursor.get_memview()
139
+ if self._cursor.has_error():
140
+ raise Exception(self._cursor.error_message())
141
+ if self._cursor.data_size() == 0:
142
+ self._current_table = None
143
+ self._current_row = 0
144
+ self._column_names = []
145
+ self._column_types = []
146
+ return
147
+
148
+ # Parse JSON data
149
+ json_data = result_mv.tobytes().decode("utf-8")
150
+ import json
151
+
152
+ try:
153
+ # First line contains column names
154
+ # Second line contains column types
155
+ # Following lines contain data
156
+ lines = json_data.strip().split("\n")
157
+ if len(lines) < 2:
158
+ self._current_table = None
159
+ self._current_row = 0
160
+ self._column_names = []
161
+ self._column_types = []
162
+ return
163
+
164
+ self._column_names = json.loads(lines[0])
165
+ self._column_types = json.loads(lines[1])
166
+
167
+ # Convert data rows
168
+ rows = []
169
+ for line in lines[2:]:
170
+ if not line.strip():
171
+ continue
172
+ row_data = json.loads(line)
173
+ converted_row = []
174
+ for val, type_info in zip(row_data, self._column_types):
175
+ # Handle NULL values first
176
+ if val is None:
177
+ converted_row.append(None)
178
+ continue
179
+
180
+ # Basic type conversion
181
+ try:
182
+ if type_info.startswith("Int") or type_info.startswith("UInt"):
183
+ converted_row.append(int(val))
184
+ elif type_info.startswith("Float"):
185
+ converted_row.append(float(val))
186
+ elif type_info == "Bool":
187
+ converted_row.append(bool(val))
188
+ elif type_info == "String" or type_info == "FixedString":
189
+ converted_row.append(str(val))
190
+ elif type_info.startswith("DateTime"):
191
+ from datetime import datetime
192
+
193
+ # Check if the value is numeric (timestamp)
194
+ val_str = str(val)
195
+ if val_str.replace(".", "").isdigit():
196
+ converted_row.append(datetime.fromtimestamp(float(val)))
197
+ else:
198
+ # Handle datetime string formats
199
+ if "." in val_str: # Has microseconds
200
+ converted_row.append(
201
+ datetime.strptime(
202
+ val_str, "%Y-%m-%d %H:%M:%S.%f"
203
+ )
204
+ )
205
+ else: # No microseconds
206
+ converted_row.append(
207
+ datetime.strptime(val_str, "%Y-%m-%d %H:%M:%S")
208
+ )
209
+ elif type_info.startswith("Date"):
210
+ from datetime import date, datetime
211
+
212
+ # Check if the value is numeric (days since epoch)
213
+ val_str = str(val)
214
+ if val_str.isdigit():
215
+ converted_row.append(
216
+ date.fromtimestamp(float(val) * 86400)
217
+ )
218
+ else:
219
+ # Handle date string format
220
+ converted_row.append(
221
+ datetime.strptime(val_str, "%Y-%m-%d").date()
222
+ )
223
+ else:
224
+ # For unsupported types, keep as string
225
+ converted_row.append(str(val))
226
+ except (ValueError, TypeError):
227
+ # If conversion fails, keep original value as string
228
+ converted_row.append(str(val))
229
+ rows.append(tuple(converted_row))
230
+
231
+ self._current_table = rows
232
+ self._current_row = 0
233
+
234
+ except json.JSONDecodeError as e:
235
+ raise Exception(f"Failed to parse JSON data: {e}")
236
+
237
+ def commit(self) -> None:
238
+ self._cursor.commit()
239
+
240
+ def fetchone(self) -> Optional[tuple]:
241
+ if not self._current_table or self._current_row >= len(self._current_table):
242
+ return None
243
+
244
+ # Now self._current_table is a list of row tuples
245
+ row = self._current_table[self._current_row]
246
+ self._current_row += 1
247
+ return row
248
+
249
+ def fetchmany(self, size: int = 1) -> tuple:
250
+ if not self._current_table:
251
+ return tuple()
252
+
253
+ rows = []
254
+ for _ in range(size):
255
+ if (row := self.fetchone()) is None:
256
+ break
257
+ rows.append(row)
258
+ return tuple(rows)
259
+
260
+ def fetchall(self) -> tuple:
261
+ if not self._current_table:
262
+ return tuple()
263
+
264
+ remaining_rows = []
265
+ while (row := self.fetchone()) is not None:
266
+ remaining_rows.append(row)
267
+ return tuple(remaining_rows)
268
+
269
+ def close(self) -> None:
270
+ self._cursor.close()
271
+
272
+ def __iter__(self):
273
+ return self
274
+
275
+ def __next__(self) -> tuple:
276
+ row = self.fetchone()
277
+ if row is None:
278
+ raise StopIteration
279
+ return row
280
+
281
+ def column_names(self) -> list:
282
+ """Return a list of column names from the last executed query"""
283
+ return self._column_names if hasattr(self, "_column_names") else []
284
+
285
+ def column_types(self) -> list:
286
+ """Return a list of column types from the last executed query"""
287
+ return self._column_types if hasattr(self, "_column_types") else []
288
+
289
+ @property
290
+ def description(self) -> list:
291
+ """
292
+ Return a description of the columns as per DB-API 2.0
293
+ Returns a list of 7-item tuples, each containing:
294
+ (name, type_code, display_size, internal_size, precision, scale, null_ok)
295
+ where only name and type_code are provided
296
+ """
297
+ if not hasattr(self, "_column_names") or not self._column_names:
298
+ return []
299
+
300
+ return [
301
+ (name, type_info, None, None, None, None, None)
302
+ for name, type_info in zip(self._column_names, self._column_types)
303
+ ]
304
+
305
+
306
+ def connect(connection_string: str = ":memory:") -> Connection:
307
+ """
308
+ Create a connection to chDB backgroud server.
309
+ Only one open connection is allowed per process. Use `close` to close the connection.
310
+ If called with the same connection string, the same connection object will be returned.
311
+ You can use the connection object to create cursor object. `cursor` method will return a cursor object.
312
+
313
+ Args:
314
+ connection_string (str, optional): Connection string. Defaults to ":memory:".
315
+ Also support file path like:
316
+ - ":memory:" (for in-memory database)
317
+ - "test.db" (for relative path)
318
+ - "file:test.db" (same as above)
319
+ - "/path/to/test.db" (for absolute path)
320
+ - "file:/path/to/test.db" (same as above)
321
+ - "file:test.db?param1=value1&param2=value2" (for relative path with query params)
322
+ - "file::memory:?verbose&log-level=test" (for in-memory database with query params)
323
+ - "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
324
+
325
+ Connection string args handling:
326
+ Connection string can contain query params like "file:test.db?param1=value1&param2=value2"
327
+ "param1=value1" will be passed to ClickHouse engine as start up args.
328
+
329
+ For more details, see `clickhouse local --help --verbose`
330
+ Some special args handling:
331
+ - "mode=ro" would be "--readonly=1" for clickhouse (read-only mode)
332
+
333
+ Returns:
334
+ Connection: Connection object
335
+ """
336
+ return Connection(connection_string)
chdb/udf/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .udf import chdb_udf, generate_udf
2
+
3
+ __all__ = ["chdb_udf", "generate_udf"]