chdb 3.7.1__cp38-abi3-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

@@ -0,0 +1,544 @@
1
+ import os
2
+ import tempfile
3
+ from io import BytesIO
4
+ import re
5
+ import pandas as pd
6
+ import pyarrow as pa
7
+ from chdb import query as chdb_query
8
+
9
+
10
+ class Table:
11
+ """Wrapper for multiple data formats enabling SQL queries on DataFrames, Parquet files, and Arrow tables.
12
+
13
+ The Table class provides a unified interface for querying different data formats using SQL.
14
+ It supports pandas DataFrames, Parquet files (both on disk and in memory), and PyArrow Tables.
15
+ All data is internally converted to Parquet format for efficient querying with chDB.
16
+
17
+ Args:
18
+ parquet_path (str, optional): Path to an existing Parquet file
19
+ temp_parquet_path (str, optional): Path to a temporary Parquet file
20
+ parquet_memoryview (memoryview, optional): Parquet data in memory as memoryview
21
+ dataframe (pd.DataFrame, optional): pandas DataFrame to wrap
22
+ arrow_table (pa.Table, optional): PyArrow Table to wrap
23
+ use_memfd (bool, optional): Use memfd_create for temporary files (Linux only). Defaults to False.
24
+
25
+ Examples:
26
+ >>> # Create from pandas DataFrame
27
+ >>> import pandas as pd
28
+ >>> df = pd.DataFrame({'id': [1, 2], 'name': ['Alice', 'Bob']})
29
+ >>> table = Table(dataframe=df)
30
+ >>> result = table.query("SELECT * FROM __table__ WHERE id > 1")
31
+
32
+ >>> # Create from Parquet file
33
+ >>> table = Table(parquet_path="data.parquet")
34
+ >>> result = table.query("SELECT COUNT(*) FROM __table__")
35
+
36
+ >>> # Multi-table queries
37
+ >>> table1 = Table(dataframe=df1)
38
+ >>> table2 = Table(dataframe=df2)
39
+ >>> result = Table.queryStatic(
40
+ ... "SELECT * FROM __table1__ JOIN __table2__ ON __table1__.id = __table2__.id",
41
+ ... table1=table1, table2=table2
42
+ ... )
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ parquet_path: str = None,
48
+ temp_parquet_path: str = None,
49
+ parquet_memoryview: memoryview = None,
50
+ dataframe: pd.DataFrame = None,
51
+ arrow_table: pa.Table = None,
52
+ use_memfd: bool = False,
53
+ ):
54
+ """Initialize a Table object with one of the supported data formats.
55
+
56
+ Only one data source should be provided. The Table will wrap the provided data
57
+ and enable SQL querying capabilities.
58
+
59
+ Args:
60
+ parquet_path (str, optional): Path to existing Parquet file
61
+ temp_parquet_path (str, optional): Path to temporary Parquet file
62
+ parquet_memoryview (memoryview, optional): Parquet data in memory
63
+ dataframe (pd.DataFrame, optional): pandas DataFrame to wrap
64
+ arrow_table (pa.Table, optional): PyArrow Table to wrap
65
+ use_memfd (bool, optional): Use memory-based file descriptors on Linux
66
+ """
67
+ self._parquet_path = parquet_path
68
+ self._temp_parquet_path = temp_parquet_path
69
+ self._parquet_memoryview = parquet_memoryview
70
+ self._dataframe = dataframe
71
+ self._arrow_table = arrow_table
72
+ self.use_memfd = use_memfd
73
+ self._rows_read = 0
74
+ self._bytes_read = 0
75
+ self._elapsed = 0
76
+
77
+ def __del__(self):
78
+ if self._temp_parquet_path is not None:
79
+ try:
80
+ os.remove(self._temp_parquet_path)
81
+ except OSError:
82
+ pass
83
+
84
+ def rows_read(self):
85
+ """Get the number of rows read from the last query operation.
86
+
87
+ Returns:
88
+ int: Number of rows processed in the last query
89
+ """
90
+ return self._rows_read
91
+
92
+ def bytes_read(self):
93
+ """Get the number of bytes read from the last query operation.
94
+
95
+ Returns:
96
+ int: Number of bytes processed in the last query
97
+ """
98
+ return self._bytes_read
99
+
100
+ def elapsed(self):
101
+ """Get the elapsed time for the last query operation.
102
+
103
+ Returns:
104
+ float: Query execution time
105
+ """
106
+ return self._elapsed
107
+
108
+ def to_pandas(self) -> pd.DataFrame:
109
+ """Convert the Table data to a pandas DataFrame.
110
+
111
+ This method handles conversion from various internal formats (Parquet files,
112
+ memory buffers, Arrow tables) to a unified pandas DataFrame representation.
113
+
114
+ Returns:
115
+ pd.DataFrame: The table data as a pandas DataFrame
116
+
117
+ Raises:
118
+ ValueError: If no data source is available in the Table object
119
+
120
+ Example:
121
+ >>> table = Table(dataframe=df)
122
+ >>> result_table = table.query("SELECT * FROM __table__ LIMIT 5")
123
+ >>> df_result = result_table.to_pandas()
124
+ >>> print(df_result)
125
+ """
126
+ if self._dataframe is None:
127
+ if self._arrow_table is not None:
128
+ return self._arrow_table.to_pandas()
129
+ elif self._parquet_memoryview is not None:
130
+ # wrap bytes to ReadBuffer
131
+ pq_reader = BytesIO(self._parquet_memoryview.tobytes())
132
+ return pandas_read_parquet(pq_reader)
133
+ elif self._parquet_path is not None:
134
+ return pandas_read_parquet(self._parquet_path)
135
+ elif self._temp_parquet_path is not None:
136
+ return pandas_read_parquet(self._temp_parquet_path)
137
+ else:
138
+ raise ValueError("No data buffer in Table object")
139
+ return self._dataframe
140
+
141
+ def flush_to_disk(self):
142
+ """Flush in-memory data to disk as a temporary Parquet file.
143
+
144
+ This method converts in-memory data (DataFrame, Arrow table, or memory buffer)
145
+ to a temporary Parquet file on disk. This can be useful for memory management
146
+ or when working with large datasets.
147
+
148
+ The method does nothing if data is already stored on disk.
149
+
150
+ Raises:
151
+ ValueError: If the Table object contains no data to flush
152
+
153
+ Example:
154
+ >>> table = Table(dataframe=large_df)
155
+ >>> table.flush_to_disk() # Frees memory, keeps data accessible
156
+ """
157
+ if self._parquet_path is not None or self._temp_parquet_path is not None:
158
+ return
159
+
160
+ if self._dataframe is not None:
161
+ self._df_to_disk(self._dataframe)
162
+ self._dataframe = None
163
+ elif self._arrow_table is not None:
164
+ self._arrow_table_to_disk(self._arrow_table)
165
+ self._arrow_table = None
166
+ elif self._parquet_memoryview is not None:
167
+ self._memoryview_to_disk(self._parquet_memoryview)
168
+ self._parquet_memoryview = None
169
+ else:
170
+ raise ValueError("No data in Table object")
171
+
172
+ def _df_to_disk(self, df):
173
+ with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
174
+ df.to_parquet(tmp)
175
+ self._temp_parquet_path = tmp.name
176
+
177
+ def _arrow_table_to_disk(self, arrow_table):
178
+ with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
179
+ pa.parquet.write_table(arrow_table, tmp.name)
180
+ self._temp_parquet_path = tmp.name
181
+
182
+ def _memoryview_to_disk(self, memoryview):
183
+ # copy memoryview to temp file
184
+ with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
185
+ tmp.write(memoryview.tobytes())
186
+ self._temp_parquet_path = tmp.name
187
+
188
+ def __repr__(self):
189
+ return repr(self.to_pandas())
190
+
191
+ def __str__(self):
192
+ return str(self.to_pandas())
193
+
194
+ def query(self, sql: str, **kwargs) -> "Table":
195
+ """Execute SQL query on the current Table and return a new Table with results.
196
+
197
+ This method allows you to run SQL queries on the table data using chDB.
198
+ The table is referenced as '__table__' in the SQL statement.
199
+
200
+ Args:
201
+ sql (str): SQL query string. Must reference the table as '__table__'
202
+ **kwargs: Additional arguments passed to the chDB query engine
203
+
204
+ Returns:
205
+ Table: New Table object containing the query results
206
+
207
+ Raises:
208
+ ValueError: If SQL doesn't contain '__table__' reference or if Table is not initialized
209
+
210
+ Examples:
211
+ >>> table = Table(dataframe=df)
212
+ >>> # Filter rows
213
+ >>> result = table.query("SELECT * FROM __table__ WHERE age > 25")
214
+ >>>
215
+ >>> # Aggregate data
216
+ >>> summary = table.query("SELECT COUNT(*), AVG(salary) FROM __table__")
217
+ >>>
218
+ >>> # Complex operations
219
+ >>> processed = table.query(
220
+ ... "SELECT name, age * 2 as double_age FROM __table__ ORDER BY age DESC"
221
+ ... )
222
+ """
223
+ self._validate_sql(sql)
224
+
225
+ if (
226
+ self._parquet_path is not None
227
+ ): # if we have parquet file path, run chdb query on it directly is faster
228
+ return self._query_on_path(self._parquet_path, sql, **kwargs)
229
+ elif self._temp_parquet_path is not None:
230
+ return self._query_on_path(self._temp_parquet_path, sql, **kwargs)
231
+ elif self._parquet_memoryview is not None:
232
+ return self.queryParquetBuffer(sql, **kwargs)
233
+ elif self._dataframe is not None:
234
+ return self.queryDF(sql, **kwargs)
235
+ elif self._arrow_table is not None:
236
+ return self.queryArrowTable(sql, **kwargs)
237
+ else:
238
+ raise ValueError("Table object is not initialized correctly")
239
+
240
+ # alias sql = query
241
+ sql = query
242
+
243
+ def show(self):
244
+ """Display the Table data by printing the pandas DataFrame representation.
245
+
246
+ This is a convenience method for quickly viewing the table contents.
247
+ Equivalent to print(table.to_pandas()).
248
+
249
+ Example:
250
+ >>> table = Table(dataframe=df)
251
+ >>> table.show()
252
+ id name
253
+ 0 1 Alice
254
+ 1 2 Bob
255
+ """
256
+ print(self.to_pandas())
257
+
258
+ def _query_on_path(self, path, sql, **kwargs):
259
+ new_sql = sql.replace("__table__", f'file("{path}", Parquet)')
260
+ res = chdb_query(new_sql, "Parquet", **kwargs)
261
+ tbl = Table(parquet_memoryview=res.get_memview())
262
+ tbl._rows_read = res.rows_read()
263
+ tbl._bytes_read = res.bytes_read()
264
+ tbl._elapsed = res.elapsed()
265
+ return tbl
266
+
267
+ def _validate_sql(self, sql):
268
+ if "__table__" not in sql:
269
+ raise ValueError("SQL should always contain `FROM __table__`")
270
+
271
+ def queryParquetBuffer(self, sql: str, **kwargs) -> "Table":
272
+ if "__table__" not in sql:
273
+ raise ValueError("SQL should always contain `FROM __table__`")
274
+ if self._parquet_memoryview is None:
275
+ raise ValueError("Parquet buffer is None")
276
+
277
+ temp_path = None
278
+ parquet_fd = -1
279
+ if self.use_memfd:
280
+ parquet_fd = memfd_create("parquet_buffer")
281
+ # if memfd_create failed, use tempfile to create a file descriptor for the memoryview
282
+ if parquet_fd == -1:
283
+ parquet_fd, temp_path = tempfile.mkstemp()
284
+ ffd = os.fdopen(parquet_fd, "wb")
285
+ ffd.write(self._parquet_memoryview.tobytes())
286
+ ffd.flush()
287
+ ret = self._run_on_temp(parquet_fd, temp_path, sql=sql, fmt="Parquet", **kwargs)
288
+ ffd.close()
289
+ if temp_path is not None:
290
+ os.remove(temp_path)
291
+ return ret
292
+
293
+ def queryArrowTable(self, sql: str, **kwargs) -> "Table":
294
+ if "__table__" not in sql:
295
+ raise ValueError("SQL should always contain `FROM __table__`")
296
+ if self._arrow_table is None:
297
+ raise ValueError("Arrow table is None")
298
+
299
+ temp_path = None
300
+ arrow_fd = -1
301
+ if self.use_memfd:
302
+ arrow_fd = memfd_create("arrow_buffer")
303
+ if arrow_fd == -1:
304
+ arrow_fd, temp_path = tempfile.mkstemp()
305
+ ffd = os.fdopen(arrow_fd, "wb")
306
+ with pa.RecordBatchFileWriter(ffd, self._arrow_table.schema) as writer:
307
+ writer.write_table(self._arrow_table)
308
+ ffd.flush()
309
+ ret = self._run_on_temp(arrow_fd, temp_path, sql=sql, fmt="Arrow", **kwargs)
310
+ ffd.close()
311
+ if temp_path is not None:
312
+ os.remove(temp_path)
313
+ return ret
314
+
315
+ def queryDF(self, sql: str, **kwargs) -> "Table":
316
+ if "__table__" not in sql:
317
+ raise ValueError("SQL should always contain `FROM __table__`")
318
+ if self._dataframe is None:
319
+ raise ValueError("Dataframe is None")
320
+
321
+ temp_path = None
322
+ parquet_fd = -1
323
+ if self.use_memfd:
324
+ parquet_fd = memfd_create()
325
+ if parquet_fd == -1:
326
+ parquet_fd, temp_path = tempfile.mkstemp()
327
+ ffd = os.fdopen(parquet_fd, "wb")
328
+ self._dataframe.to_parquet(ffd, engine="pyarrow", compression=None)
329
+ ffd.flush()
330
+ ret = self._run_on_temp(parquet_fd, temp_path, sql=sql, fmt="Parquet", **kwargs)
331
+ ffd.close()
332
+ if temp_path is not None:
333
+ os.remove(temp_path)
334
+ return ret
335
+
336
+ @staticmethod
337
+ def queryStatic(sql: str, **kwargs) -> "Table":
338
+ """Execute SQL query across multiple Table objects.
339
+
340
+ This static method enables complex queries involving multiple tables by referencing
341
+ them as '__tablename__' in the SQL and passing them as keyword arguments.
342
+
343
+ Args:
344
+ sql (str): SQL query with table references as '__name__' patterns
345
+ **kwargs: Table objects referenced in the SQL, where key matches the table name
346
+ Can also include pandas DataFrames, which will be auto-converted to Tables
347
+
348
+ Returns:
349
+ Table: New Table object containing the query results
350
+
351
+ Raises:
352
+ ValueError: If referenced table names are missing from kwargs or have invalid types
353
+
354
+ Examples:
355
+ >>> users = Table(dataframe=users_df)
356
+ >>> orders = Table(dataframe=orders_df)
357
+ >>>
358
+ >>> # Join two tables
359
+ >>> result = Table.queryStatic(
360
+ ... "SELECT u.name, COUNT(o.id) as order_count "
361
+ ... "FROM __users__ u LEFT JOIN __orders__ o ON u.id = o.user_id "
362
+ ... "GROUP BY u.name",
363
+ ... users=users, orders=orders
364
+ ... )
365
+ >>>
366
+ >>> # Works with pandas DataFrames directly
367
+ >>> result = Table.queryStatic(
368
+ ... "SELECT * FROM __df1__ UNION ALL SELECT * FROM __df2__",
369
+ ... df1=dataframe1, df2=dataframe2
370
+ ... )
371
+ >>>
372
+ >>> # Complex multi-table operations
373
+ >>> analytics = Table.queryStatic(
374
+ ... "SELECT p.category, AVG(o.amount) as avg_order "
375
+ ... "FROM __products__ p "
376
+ ... "JOIN __order_items__ oi ON p.id = oi.product_id "
377
+ ... "JOIN __orders__ o ON oi.order_id = o.id "
378
+ ... "GROUP BY p.category ORDER BY avg_order DESC",
379
+ ... products=products_table,
380
+ ... order_items=order_items_table,
381
+ ... orders=orders_table
382
+ ... )
383
+ """
384
+ ansiTablePattern = re.compile(r"__([a-zA-Z][a-zA-Z0-9_]*)__")
385
+ temp_paths = []
386
+ ffds = []
387
+
388
+ def replace_table_name(match):
389
+ tableName = match.group(1)
390
+ if tableName not in kwargs:
391
+ raise ValueError(f"Table {tableName} should be passed as a parameter")
392
+
393
+ tbl = kwargs[tableName]
394
+ # if tbl is DataFrame, convert it to Table
395
+ if isinstance(tbl, pd.DataFrame):
396
+ tbl = Table(dataframe=tbl)
397
+ elif not isinstance(tbl, Table):
398
+ raise ValueError(
399
+ f"Table {tableName} should be an instance of Table or DataFrame")
400
+
401
+ if tbl._parquet_path is not None:
402
+ return f'file("{tbl._parquet_path}", Parquet)'
403
+
404
+ if tbl._temp_parquet_path is not None:
405
+ return f'file("{tbl._temp_parquet_path}", Parquet)'
406
+
407
+ temp_path = None
408
+ data_fd = -1
409
+
410
+ if tbl.use_memfd:
411
+ data_fd = memfd_create()
412
+
413
+ if data_fd == -1:
414
+ data_fd, temp_path = tempfile.mkstemp()
415
+ temp_paths.append(temp_path)
416
+
417
+ ffd = os.fdopen(data_fd, "wb")
418
+ ffds.append(ffd)
419
+
420
+ if tbl._parquet_memoryview is not None:
421
+ ffd.write(tbl._parquet_memoryview.tobytes())
422
+ ffd.flush()
423
+ os.lseek(data_fd, 0, os.SEEK_SET)
424
+ return f'file("/dev/fd/{data_fd}", Parquet)'
425
+
426
+ if tbl._dataframe is not None:
427
+ ffd.write(tbl._dataframe.to_parquet(engine="pyarrow", compression=None))
428
+ ffd.flush()
429
+ os.lseek(data_fd, 0, os.SEEK_SET)
430
+ return f'file("/dev/fd/{data_fd}", Parquet)'
431
+
432
+ if tbl._arrow_table is not None:
433
+ with pa.RecordBatchFileWriter(ffd, tbl._arrow_table.schema) as writer:
434
+ writer.write_table(tbl._arrow_table)
435
+ ffd.flush()
436
+ os.lseek(data_fd, 0, os.SEEK_SET)
437
+ return f'file("/dev/fd/{data_fd}", Arrow)'
438
+
439
+ raise ValueError(f"Table {tableName} is not initialized correctly")
440
+
441
+ sql = ansiTablePattern.sub(replace_table_name, sql)
442
+ res = chdb_query(sql, "Parquet")
443
+
444
+ for fd in ffds:
445
+ fd.close()
446
+
447
+ for tmp_path in temp_paths:
448
+ os.remove(tmp_path)
449
+
450
+ tbl = Table(parquet_memoryview=res.get_memview())
451
+ tbl._rows_read = res.rows_read()
452
+ tbl._bytes_read = res.bytes_read()
453
+ tbl._elapsed = res.elapsed()
454
+ return tbl
455
+
456
+ def _run_on_temp(
457
+ self,
458
+ fd: int,
459
+ temp_path: str = None,
460
+ sql: str = None,
461
+ fmt: str = "Parquet",
462
+ **kwargs,
463
+ ) -> "Table":
464
+ # replace "__table__" with file("temp_path", Parquet) or file("/dev/fd/{parquet_fd}", Parquet)
465
+ if temp_path is not None:
466
+ new_sql = sql.replace("__table__", f'file("{temp_path}", {fmt})')
467
+ else:
468
+ os.lseek(fd, 0, os.SEEK_SET)
469
+ new_sql = sql.replace("__table__", f'file("/dev/fd/{fd}", {fmt})')
470
+ res = chdb_query(new_sql, "Parquet", **kwargs)
471
+ tbl = Table(parquet_memoryview=res.get_memview())
472
+ tbl._rows_read = res.rows_read()
473
+ tbl._bytes_read = res.bytes_read()
474
+ tbl._elapsed = res.elapsed()
475
+ return tbl
476
+
477
+
478
+ def pandas_read_parquet(path) -> pd.DataFrame:
479
+ """Read a Parquet file into a pandas DataFrame.
480
+
481
+ This is a convenience wrapper around pandas.read_parquet() for consistency
482
+ with the chdb.dataframe module interface.
483
+
484
+ Args:
485
+ path: File path or file-like object to read from
486
+
487
+ Returns:
488
+ pd.DataFrame: The loaded DataFrame
489
+ """
490
+ return pd.read_parquet(path)
491
+
492
+
493
+ def memfd_create(name: str = None) -> int:
494
+ """Create an in-memory file descriptor using memfd_create system call.
495
+
496
+ This function attempts to use the Linux-specific memfd_create(2) system call
497
+ to create a file descriptor that refers to an anonymous memory-backed file.
498
+ This provides better performance for temporary data operations.
499
+
500
+ Args:
501
+ name (str, optional): Name for the memory file (for debugging). Defaults to None.
502
+
503
+ Returns:
504
+ int: File descriptor on success, -1 on failure or if not supported
505
+
506
+ Note:
507
+ This function only works on Linux 3.17 or newer with glibc 2.27 or newer.
508
+ On other systems or if the call fails, it returns -1 and callers should
509
+ fall back to regular temporary files.
510
+
511
+ Example:
512
+ >>> fd = memfd_create("temp_data")
513
+ >>> if fd != -1:
514
+ ... # Use memory-based file descriptor
515
+ ... with os.fdopen(fd, 'wb') as f:
516
+ ... f.write(data)
517
+ ... else:
518
+ ... # Fall back to regular temp file
519
+ ... fd, path = tempfile.mkstemp()
520
+ """
521
+ if hasattr(os, "memfd_create"):
522
+ try:
523
+ fd = os.memfd_create(name, flags=os.MFD_CLOEXEC)
524
+ return fd
525
+ except: # noqa
526
+ return -1
527
+ return -1
528
+
529
+
530
+ if __name__ == "__main__":
531
+ import argparse
532
+
533
+ parser = argparse.ArgumentParser(description="Run SQL on parquet file")
534
+ parser.add_argument("parquet_path", type=str, help="path to parquet file")
535
+ parser.add_argument("sql", type=str, help="SQL to run")
536
+ parser.add_argument(
537
+ "--use-memfd",
538
+ action="store_true",
539
+ help="use memfd_create to create file descriptor",
540
+ )
541
+ args = parser.parse_args()
542
+
543
+ table = Table(parquet_path=args.parquet_path, use_memfd=args.use_memfd)
544
+ print(table.query(args.sql))
chdb/dbapi/__init__.py ADDED
@@ -0,0 +1,134 @@
1
+ from .constants import FIELD_TYPE
2
+ from . import connections as _orig_conn
3
+ from .. import chdb_version
4
+
5
+ if len(chdb_version) > 3 and chdb_version[3] is not None:
6
+ VERSION_STRING = "%s.%s.%s_%s" % chdb_version
7
+ else:
8
+ VERSION_STRING = "%s.%s.%s" % chdb_version[:3]
9
+
10
+ threadsafety = 1
11
+ apilevel = "2.0"
12
+ paramstyle = "format"
13
+
14
+
15
+ class DBAPISet(frozenset):
16
+ """Extended frozenset for DB-API 2.0 type comparison.
17
+
18
+ This class extends frozenset to support DB-API 2.0 type comparison semantics.
19
+ It allows for flexible type checking where individual items can be compared
20
+ against the set using both equality and inequality operators.
21
+
22
+ This is used for type constants like STRING, BINARY, NUMBER, etc. to enable
23
+ comparisons like "field_type == STRING" where field_type is a single type value.
24
+
25
+ Examples:
26
+ >>> string_types = DBAPISet([FIELD_TYPE.STRING, FIELD_TYPE.VAR_STRING])
27
+ >>> FIELD_TYPE.STRING == string_types # Returns True
28
+ >>> FIELD_TYPE.INT != string_types # Returns True
29
+ >>> FIELD_TYPE.BLOB in string_types # Returns False
30
+ """
31
+
32
+ def __ne__(self, other):
33
+ """Check inequality with flexible type comparison.
34
+
35
+ Args:
36
+ other: Value to compare against this set
37
+
38
+ Returns:
39
+ bool: True if other is not in this set (for non-set types) or
40
+ True if sets are not equal (for set types)
41
+ """
42
+ if isinstance(other, set):
43
+ return frozenset.__ne__(self, other)
44
+ else:
45
+ return other not in self
46
+
47
+ def __eq__(self, other):
48
+ """Check equality with flexible type comparison.
49
+
50
+ Args:
51
+ other: Value to compare against this set
52
+
53
+ Returns:
54
+ bool: True if other is in this set (for non-set types) or
55
+ True if sets are equal (for set types)
56
+ """
57
+ if isinstance(other, frozenset):
58
+ return frozenset.__eq__(self, other)
59
+ else:
60
+ return other in self
61
+
62
+ def __hash__(self):
63
+ """Return hash value for the set.
64
+
65
+ Returns:
66
+ int: Hash value of the underlying frozenset
67
+ """
68
+ return frozenset.__hash__(self)
69
+
70
+
71
+ # TODO it's in pep249 find out meaning and usage of this
72
+ # https://www.python.org/dev/peps/pep-0249/#string
73
+ STRING = DBAPISet([FIELD_TYPE.ENUM, FIELD_TYPE.STRING,
74
+ FIELD_TYPE.VAR_STRING])
75
+ BINARY = DBAPISet([FIELD_TYPE.BLOB, FIELD_TYPE.LONG_BLOB,
76
+ FIELD_TYPE.MEDIUM_BLOB, FIELD_TYPE.TINY_BLOB])
77
+ NUMBER = DBAPISet([FIELD_TYPE.DECIMAL, FIELD_TYPE.DOUBLE, FIELD_TYPE.FLOAT,
78
+ FIELD_TYPE.INT24, FIELD_TYPE.LONG, FIELD_TYPE.LONGLONG,
79
+ FIELD_TYPE.TINY, FIELD_TYPE.YEAR])
80
+ DATE = DBAPISet([FIELD_TYPE.DATE, FIELD_TYPE.NEWDATE])
81
+ TIME = DBAPISet([FIELD_TYPE.TIME])
82
+ TIMESTAMP = DBAPISet([FIELD_TYPE.TIMESTAMP, FIELD_TYPE.DATETIME])
83
+ DATETIME = TIMESTAMP
84
+ ROWID = DBAPISet()
85
+
86
+
87
+ def Binary(x):
88
+ """Return x as a binary type.
89
+
90
+ This function converts the input to bytes type for use with binary
91
+ database fields, following the DB-API 2.0 specification.
92
+
93
+ Args:
94
+ x: Input data to convert to binary
95
+
96
+ Returns:
97
+ bytes: The input converted to bytes
98
+ """
99
+ return bytes(x)
100
+
101
+
102
+ def Connect(*args, **kwargs):
103
+ """
104
+ Connect to the database; see connections.Connection.__init__() for
105
+ more information.
106
+ """
107
+ from .connections import Connection
108
+ return Connection(*args, **kwargs)
109
+
110
+
111
+ if _orig_conn.Connection.__init__.__doc__ is not None:
112
+ Connect.__doc__ = _orig_conn.Connection.__init__.__doc__
113
+ del _orig_conn
114
+
115
+
116
+ def get_client_info():
117
+ """Get client version information.
118
+
119
+ Returns the chDB client version as a string for MySQLdb compatibility.
120
+
121
+ Returns:
122
+ str: Version string in format 'major.minor.patch'
123
+ """
124
+ version = chdb_version
125
+ if len(chdb_version) > 3 and chdb_version[3] is None:
126
+ version = chdb_version[:3]
127
+ return '.'.join(map(str, version))
128
+
129
+
130
+ connect = Connection = Connect
131
+
132
+ NULL = "NULL"
133
+
134
+ __version__ = get_client_info()