chdb 3.2.0__cp313-cp313-macosx_10_15_x86_64.whl → 3.3.0__cp313-cp313-macosx_10_15_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chdb might be problematic. Click here for more details.
- chdb/__init__.py +1 -1
- chdb/_chdb.cpython-313-darwin.so +0 -0
- {chdb-3.2.0.dist-info → chdb-3.3.0.dist-info}/METADATA +84 -11
- {chdb-3.2.0.dist-info → chdb-3.3.0.dist-info}/RECORD +7 -7
- {chdb-3.2.0.dist-info → chdb-3.3.0.dist-info}/WHEEL +1 -1
- {chdb-3.2.0.dist-info → chdb-3.3.0.dist-info}/licenses/LICENSE.txt +0 -0
- {chdb-3.2.0.dist-info → chdb-3.3.0.dist-info}/top_level.txt +0 -0
chdb/__init__.py
CHANGED
|
@@ -19,7 +19,7 @@ _process_result_format_funs = {
|
|
|
19
19
|
# UDF script path will be f"{g_udf_path}/{func_name}.py"
|
|
20
20
|
g_udf_path = ""
|
|
21
21
|
|
|
22
|
-
chdb_version = ('3', '
|
|
22
|
+
chdb_version = ('3', '3', '0')
|
|
23
23
|
if sys.version_info[:2] >= (3, 7):
|
|
24
24
|
# get the path of the current file
|
|
25
25
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
chdb/_chdb.cpython-313-darwin.so
CHANGED
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chdb
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.3.0
|
|
4
4
|
Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
|
|
5
5
|
Home-page: https://github.com/chdb-io/chdb
|
|
6
6
|
Author: auxten
|
|
@@ -54,11 +54,11 @@ Dynamic: requires-python
|
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
> chDB is an in-process SQL OLAP Engine powered by ClickHouse [^1]
|
|
57
|
-
> For more details: [The birth of chDB](https://auxten.com/the-birth-of-chdb/)
|
|
57
|
+
> For more details: [The birth of chDB](https://auxten.com/the-birth-of-chdb/)
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
## Features
|
|
61
|
-
|
|
61
|
+
|
|
62
62
|
* In-process SQL OLAP Engine, powered by ClickHouse
|
|
63
63
|
* No need to install ClickHouse
|
|
64
64
|
* Minimized data copy from C++ to Python with [python memoryview](https://docs.python.org/3/c-api/memoryview.html)
|
|
@@ -149,7 +149,7 @@ res = chdb.query('select version()', 'Pretty'); print(res)
|
|
|
149
149
|
# See more data type format in tests/format_output.py
|
|
150
150
|
res = chdb.query('select * from file("data.parquet", Parquet)', 'JSON'); print(res)
|
|
151
151
|
res = chdb.query('select * from file("data.csv", CSV)', 'CSV'); print(res)
|
|
152
|
-
print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, elapsed {res.elapsed()} seconds")
|
|
152
|
+
print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, storage read {res.storage_rows_read()} rows, {res.storage_bytes_read()} bytes, elapsed {res.elapsed()} seconds")
|
|
153
153
|
```
|
|
154
154
|
|
|
155
155
|
### Pandas dataframe output
|
|
@@ -174,6 +174,8 @@ ret_tbl = cdf.query(sql="select * from __tbl1__ t1 join __tbl2__ t2 on t1.a = t2
|
|
|
174
174
|
print(ret_tbl)
|
|
175
175
|
# Query on the DataFrame Table
|
|
176
176
|
print(ret_tbl.query('select b, sum(a) from __table__ group by b'))
|
|
177
|
+
# Pandas DataFrames are automatically registered as temporary tables in ClickHouse
|
|
178
|
+
chdb.query("SELECT * FROM Python(df1) t1 JOIN Python(df2) t2 ON t1.a = t2.c").show()
|
|
177
179
|
```
|
|
178
180
|
</details>
|
|
179
181
|
|
|
@@ -321,10 +323,19 @@ df = pd.DataFrame(
|
|
|
321
323
|
{
|
|
322
324
|
"a": [1, 2, 3, 4, 5, 6],
|
|
323
325
|
"b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
|
|
326
|
+
"dict_col": [
|
|
327
|
+
{'id': 1, 'tags': ['urgent', 'important'], 'metadata': {'created': '2024-01-01'}},
|
|
328
|
+
{'id': 2, 'tags': ['normal'], 'metadata': {'created': '2024-02-01'}},
|
|
329
|
+
{'id': 3, 'name': 'tom'},
|
|
330
|
+
{'id': 4, 'value': '100'},
|
|
331
|
+
{'id': 5, 'value': 101},
|
|
332
|
+
{'id': 6, 'value': 102},
|
|
333
|
+
],
|
|
324
334
|
}
|
|
325
335
|
)
|
|
326
336
|
|
|
327
337
|
chdb.query("SELECT b, sum(a) FROM Python(df) GROUP BY b ORDER BY b").show()
|
|
338
|
+
chdb.query("SELECT dict_col.id FROM Python(df) WHERE dict_col.value='100'").show()
|
|
328
339
|
```
|
|
329
340
|
|
|
330
341
|
### Query on Arrow Table
|
|
@@ -336,12 +347,19 @@ arrow_table = pa.table(
|
|
|
336
347
|
{
|
|
337
348
|
"a": [1, 2, 3, 4, 5, 6],
|
|
338
349
|
"b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
|
|
350
|
+
"dict_col": [
|
|
351
|
+
{'id': 1, 'value': 'tom'},
|
|
352
|
+
{'id': 2, 'value': 'jerry'},
|
|
353
|
+
{'id': 3, 'value': 'auxten'},
|
|
354
|
+
{'id': 4, 'value': 'tom'},
|
|
355
|
+
{'id': 5, 'value': 'jerry'},
|
|
356
|
+
{'id': 6, 'value': 'auxten'},
|
|
357
|
+
],
|
|
339
358
|
}
|
|
340
359
|
)
|
|
341
360
|
|
|
342
|
-
chdb.query(
|
|
343
|
-
|
|
344
|
-
).show()
|
|
361
|
+
chdb.query("SELECT b, sum(a) FROM Python(arrow_table) GROUP BY b ORDER BY b").show()
|
|
362
|
+
chdb.query("SELECT dict_col.id FROM Python(arrow_table) WHERE dict_col.value='tom'").show()
|
|
345
363
|
```
|
|
346
364
|
|
|
347
365
|
### Query on chdb.PyReader class instance
|
|
@@ -365,24 +383,79 @@ class myReader(chdb.PyReader):
|
|
|
365
383
|
def read(self, col_names, count):
|
|
366
384
|
print("Python func read", col_names, count, self.cursor)
|
|
367
385
|
if self.cursor >= len(self.data["a"]):
|
|
386
|
+
self.cursor = 0
|
|
368
387
|
return []
|
|
369
388
|
block = [self.data[col] for col in col_names]
|
|
370
389
|
self.cursor += len(block[0])
|
|
371
390
|
return block
|
|
372
391
|
|
|
392
|
+
def get_schema(self):
|
|
393
|
+
return [
|
|
394
|
+
("a", "int"),
|
|
395
|
+
("b", "str"),
|
|
396
|
+
("dict_col", "json")
|
|
397
|
+
]
|
|
398
|
+
|
|
373
399
|
reader = myReader(
|
|
374
400
|
{
|
|
375
401
|
"a": [1, 2, 3, 4, 5, 6],
|
|
376
402
|
"b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
|
|
403
|
+
"dict_col": [
|
|
404
|
+
{'id': 1, 'tags': ['urgent', 'important'], 'metadata': {'created': '2024-01-01'}},
|
|
405
|
+
{'id': 2, 'tags': ['normal'], 'metadata': {'created': '2024-02-01'}},
|
|
406
|
+
{'id': 3, 'name': 'tom'},
|
|
407
|
+
{'id': 4, 'value': '100'},
|
|
408
|
+
{'id': 5, 'value': 101},
|
|
409
|
+
{'id': 6, 'value': 102}
|
|
410
|
+
],
|
|
377
411
|
}
|
|
378
412
|
)
|
|
379
413
|
|
|
380
|
-
chdb.query(
|
|
381
|
-
|
|
382
|
-
).show()
|
|
414
|
+
chdb.query("SELECT b, sum(a) FROM Python(reader) GROUP BY b ORDER BY b").show()
|
|
415
|
+
chdb.query("SELECT dict_col.id FROM Python(reader) WHERE dict_col.value='100'").show()
|
|
383
416
|
```
|
|
384
417
|
|
|
385
|
-
see also: [test_query_py.py](tests/test_query_py.py).
|
|
418
|
+
see also: [test_query_py.py](tests/test_query_py.py) and [test_query_json.py](tests/test_query_json.py).
|
|
419
|
+
|
|
420
|
+
### JSON Type Inference
|
|
421
|
+
|
|
422
|
+
chDB automatically converts Python dictionary objects to ClickHouse JSON types from these sources:
|
|
423
|
+
|
|
424
|
+
1. **Pandas DataFrame**
|
|
425
|
+
- Columns with `object` dtype are sampled (default 10,000 rows) to detect JSON structures.
|
|
426
|
+
- Control sampling via SQL settings:
|
|
427
|
+
```sql
|
|
428
|
+
SET pandas_analyze_sample = 10000 -- Default sampling
|
|
429
|
+
SET pandas_analyze_sample = 0 -- Force String type
|
|
430
|
+
SET pandas_analyze_sample = -1 -- Force JSON type
|
|
431
|
+
```
|
|
432
|
+
- Columns are converted to `String` if sampling finds non-dictionary values.
|
|
433
|
+
|
|
434
|
+
2. **Arrow Table**
|
|
435
|
+
- `struct` type columns are automatically mapped to JSON columns.
|
|
436
|
+
- Nested structures preserve type information.
|
|
437
|
+
|
|
438
|
+
3. **chdb.PyReader**
|
|
439
|
+
- Implement custom schema mapping in `get_schema()`:
|
|
440
|
+
```python
|
|
441
|
+
def get_schema(self):
|
|
442
|
+
return [
|
|
443
|
+
("c1", "JSON"), # Explicit JSON mapping
|
|
444
|
+
("c2", "String")
|
|
445
|
+
]
|
|
446
|
+
```
|
|
447
|
+
- Column types declared as "JSON" will bypass auto-detection.
|
|
448
|
+
|
|
449
|
+
When converting Python dictionary objects to JSON columns:
|
|
450
|
+
|
|
451
|
+
1. **Nested Structures**
|
|
452
|
+
- Recursively process nested dictionaries, lists, tuples and NumPy arrays.
|
|
453
|
+
|
|
454
|
+
2. **Primitive Types**
|
|
455
|
+
- Automatic type recognition for basic types such as integers, floats, strings, and booleans, and more.
|
|
456
|
+
|
|
457
|
+
3. **Complex Objects**
|
|
458
|
+
- Non-primitive types will be converted to strings.
|
|
386
459
|
|
|
387
460
|
### Limitations
|
|
388
461
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
chdb/__init__.py,sha256=
|
|
1
|
+
chdb/__init__.py,sha256=KsqKKRN2T2Rspn94XwwtR45fT5viF5h6KUJ7JIETo1w,3762
|
|
2
2
|
chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
|
|
3
|
-
chdb/_chdb.cpython-313-darwin.so,sha256=
|
|
3
|
+
chdb/_chdb.cpython-313-darwin.so,sha256=_-m6qaMF_RqSV7ln2D7SnnmsiWQA9uC2_zfZcLhchok,422327392
|
|
4
4
|
chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
|
|
5
5
|
chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
|
|
6
6
|
chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
|
|
@@ -21,8 +21,8 @@ chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
|
|
|
21
21
|
chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
|
|
22
22
|
chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
|
|
23
23
|
chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
|
|
24
|
-
chdb-3.
|
|
25
|
-
chdb-3.
|
|
26
|
-
chdb-3.
|
|
27
|
-
chdb-3.
|
|
28
|
-
chdb-3.
|
|
24
|
+
chdb-3.3.0.dist-info/licenses/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
|
|
25
|
+
chdb-3.3.0.dist-info/METADATA,sha256=fRAJ-SRLZW1U7SpJeiGr2S7rKJyesMCGoQPpcPc6U1s,24690
|
|
26
|
+
chdb-3.3.0.dist-info/WHEEL,sha256=pSzfeXdDXK1kuDkODmGSIGRmpWvZp2aIIR6ouoIacKw,111
|
|
27
|
+
chdb-3.3.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
|
|
28
|
+
chdb-3.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|