chdb 3.2.0__cp310-cp310-macosx_10_15_x86_64.whl → 3.4.0__cp310-cp310-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/__init__.py CHANGED
@@ -19,7 +19,7 @@ _process_result_format_funs = {
19
19
  # UDF script path will be f"{g_udf_path}/{func_name}.py"
20
20
  g_udf_path = ""
21
21
 
22
- chdb_version = ('3', '2', '0')
22
+ chdb_version = ('3', '4', '0')
23
23
  if sys.version_info[:2] >= (3, 7):
24
24
  # get the path of the current file
25
25
  current_path = os.path.dirname(os.path.abspath(__file__))
Binary file
chdb/session/state.py CHANGED
@@ -1,5 +1,3 @@
1
- import tempfile
2
- import shutil
3
1
  import warnings
4
2
 
5
3
  import chdb
@@ -51,11 +49,9 @@ class Session:
51
49
  )
52
50
  g_session.close()
53
51
  g_session_path = None
54
- if path is None or ":memory:" in path:
55
- self._cleanup = True
56
- self._path = tempfile.mkdtemp()
52
+ if path is None:
53
+ self._path = ":memory:"
57
54
  else:
58
- self._cleanup = False
59
55
  self._path = path
60
56
  if chdb.g_udf_path != "":
61
57
  self._udf_path = chdb.g_udf_path
@@ -84,8 +80,6 @@ class Session:
84
80
  self.close()
85
81
 
86
82
  def close(self):
87
- if self._cleanup:
88
- self.cleanup()
89
83
  if self._conn is not None:
90
84
  self._conn.close()
91
85
  self._conn = None
@@ -95,13 +89,7 @@ class Session:
95
89
 
96
90
  def cleanup(self):
97
91
  try:
98
- if self._conn is not None:
99
- self._conn.close()
100
- self._conn = None
101
- shutil.rmtree(self._path)
102
- global g_session, g_session_path
103
- g_session = None
104
- g_session_path = None
92
+ self.close()
105
93
  except: # noqa
106
94
  pass
107
95
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chdb
3
- Version: 3.2.0
3
+ Version: 3.4.0
4
4
  Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
5
5
  Home-page: https://github.com/chdb-io/chdb
6
6
  Author: auxten
@@ -54,11 +54,11 @@ Dynamic: requires-python
54
54
 
55
55
 
56
56
  > chDB is an in-process SQL OLAP Engine powered by ClickHouse [^1]
57
- > For more details: [The birth of chDB](https://auxten.com/the-birth-of-chdb/)
57
+ > For more details: [The birth of chDB](https://auxten.com/the-birth-of-chdb/)
58
58
 
59
59
 
60
60
  ## Features
61
-
61
+
62
62
  * In-process SQL OLAP Engine, powered by ClickHouse
63
63
  * No need to install ClickHouse
64
64
  * Minimized data copy from C++ to Python with [python memoryview](https://docs.python.org/3/c-api/memoryview.html)
@@ -149,7 +149,7 @@ res = chdb.query('select version()', 'Pretty'); print(res)
149
149
  # See more data type format in tests/format_output.py
150
150
  res = chdb.query('select * from file("data.parquet", Parquet)', 'JSON'); print(res)
151
151
  res = chdb.query('select * from file("data.csv", CSV)', 'CSV'); print(res)
152
- print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, elapsed {res.elapsed()} seconds")
152
+ print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, storage read {res.storage_rows_read()} rows, {res.storage_bytes_read()} bytes, elapsed {res.elapsed()} seconds")
153
153
  ```
154
154
 
155
155
  ### Pandas dataframe output
@@ -174,6 +174,8 @@ ret_tbl = cdf.query(sql="select * from __tbl1__ t1 join __tbl2__ t2 on t1.a = t2
174
174
  print(ret_tbl)
175
175
  # Query on the DataFrame Table
176
176
  print(ret_tbl.query('select b, sum(a) from __table__ group by b'))
177
+ # Pandas DataFrames are automatically registered as temporary tables in ClickHouse
178
+ chdb.query("SELECT * FROM Python(df1) t1 JOIN Python(df2) t2 ON t1.a = t2.c").show()
177
179
  ```
178
180
  </details>
179
181
 
@@ -321,10 +323,19 @@ df = pd.DataFrame(
321
323
  {
322
324
  "a": [1, 2, 3, 4, 5, 6],
323
325
  "b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
326
+ "dict_col": [
327
+ {'id': 1, 'tags': ['urgent', 'important'], 'metadata': {'created': '2024-01-01'}},
328
+ {'id': 2, 'tags': ['normal'], 'metadata': {'created': '2024-02-01'}},
329
+ {'id': 3, 'name': 'tom'},
330
+ {'id': 4, 'value': '100'},
331
+ {'id': 5, 'value': 101},
332
+ {'id': 6, 'value': 102},
333
+ ],
324
334
  }
325
335
  )
326
336
 
327
337
  chdb.query("SELECT b, sum(a) FROM Python(df) GROUP BY b ORDER BY b").show()
338
+ chdb.query("SELECT dict_col.id FROM Python(df) WHERE dict_col.value='100'").show()
328
339
  ```
329
340
 
330
341
  ### Query on Arrow Table
@@ -336,12 +347,19 @@ arrow_table = pa.table(
336
347
  {
337
348
  "a": [1, 2, 3, 4, 5, 6],
338
349
  "b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
350
+ "dict_col": [
351
+ {'id': 1, 'value': 'tom'},
352
+ {'id': 2, 'value': 'jerry'},
353
+ {'id': 3, 'value': 'auxten'},
354
+ {'id': 4, 'value': 'tom'},
355
+ {'id': 5, 'value': 'jerry'},
356
+ {'id': 6, 'value': 'auxten'},
357
+ ],
339
358
  }
340
359
  )
341
360
 
342
- chdb.query(
343
- "SELECT b, sum(a) FROM Python(arrow_table) GROUP BY b ORDER BY b", "debug"
344
- ).show()
361
+ chdb.query("SELECT b, sum(a) FROM Python(arrow_table) GROUP BY b ORDER BY b").show()
362
+ chdb.query("SELECT dict_col.id FROM Python(arrow_table) WHERE dict_col.value='tom'").show()
345
363
  ```
346
364
 
347
365
  ### Query on chdb.PyReader class instance
@@ -365,24 +383,79 @@ class myReader(chdb.PyReader):
365
383
  def read(self, col_names, count):
366
384
  print("Python func read", col_names, count, self.cursor)
367
385
  if self.cursor >= len(self.data["a"]):
386
+ self.cursor = 0
368
387
  return []
369
388
  block = [self.data[col] for col in col_names]
370
389
  self.cursor += len(block[0])
371
390
  return block
372
391
 
392
+ def get_schema(self):
393
+ return [
394
+ ("a", "int"),
395
+ ("b", "str"),
396
+ ("dict_col", "json")
397
+ ]
398
+
373
399
  reader = myReader(
374
400
  {
375
401
  "a": [1, 2, 3, 4, 5, 6],
376
402
  "b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
403
+ "dict_col": [
404
+ {'id': 1, 'tags': ['urgent', 'important'], 'metadata': {'created': '2024-01-01'}},
405
+ {'id': 2, 'tags': ['normal'], 'metadata': {'created': '2024-02-01'}},
406
+ {'id': 3, 'name': 'tom'},
407
+ {'id': 4, 'value': '100'},
408
+ {'id': 5, 'value': 101},
409
+ {'id': 6, 'value': 102}
410
+ ],
377
411
  }
378
412
  )
379
413
 
380
- chdb.query(
381
- "SELECT b, sum(a) FROM Python(reader) GROUP BY b ORDER BY b"
382
- ).show()
414
+ chdb.query("SELECT b, sum(a) FROM Python(reader) GROUP BY b ORDER BY b").show()
415
+ chdb.query("SELECT dict_col.id FROM Python(reader) WHERE dict_col.value='100'").show()
383
416
  ```
384
417
 
385
- see also: [test_query_py.py](tests/test_query_py.py).
418
+ see also: [test_query_py.py](tests/test_query_py.py) and [test_query_json.py](tests/test_query_json.py).
419
+
420
+ ### JSON Type Inference
421
+
422
+ chDB automatically converts Python dictionary objects to ClickHouse JSON types from these sources:
423
+
424
+ 1. **Pandas DataFrame**
425
+ - Columns with `object` dtype are sampled (default 10,000 rows) to detect JSON structures.
426
+ - Control sampling via SQL settings:
427
+ ```sql
428
+ SET pandas_analyze_sample = 10000 -- Default sampling
429
+ SET pandas_analyze_sample = 0 -- Force String type
430
+ SET pandas_analyze_sample = -1 -- Force JSON type
431
+ ```
432
+ - Columns are converted to `String` if sampling finds non-dictionary values.
433
+
434
+ 2. **Arrow Table**
435
+ - `struct` type columns are automatically mapped to JSON columns.
436
+ - Nested structures preserve type information.
437
+
438
+ 3. **chdb.PyReader**
439
+ - Implement custom schema mapping in `get_schema()`:
440
+ ```python
441
+ def get_schema(self):
442
+ return [
443
+ ("c1", "JSON"), # Explicit JSON mapping
444
+ ("c2", "String")
445
+ ]
446
+ ```
447
+ - Column types declared as "JSON" will bypass auto-detection.
448
+
449
+ When converting Python dictionary objects to JSON columns:
450
+
451
+ 1. **Nested Structures**
452
+ - Recursively process nested dictionaries, lists, tuples and NumPy arrays.
453
+
454
+ 2. **Primitive Types**
455
+ - Automatic type recognition for basic types such as integers, floats, strings, and booleans, and more.
456
+
457
+ 3. **Complex Objects**
458
+ - Non-primitive types will be converted to strings.
386
459
 
387
460
  ### Limitations
388
461
 
@@ -1,6 +1,6 @@
1
- chdb/__init__.py,sha256=yuWj0i3_5-uBRZCyZMBKIiBR1MmjEyAjcuxKTm076jI,3762
1
+ chdb/__init__.py,sha256=pNYyLRqm2s5hG1rPhVLECZXvJQ60EGPt_OC88W90j0w,3762
2
2
  chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
3
- chdb/_chdb.cpython-310-darwin.so,sha256=Hg35HUPx4pIeVRb_MTSDBMvAVE96tgYDesbjwNliHd0,422118800
3
+ chdb/_chdb.cpython-310-darwin.so,sha256=-ivmoDcGPeidCgr3bn-O2O1yyVahnBHemraA9nYjmbE,411160252
4
4
  chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
5
5
  chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
6
6
  chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
@@ -13,7 +13,7 @@ chdb/dbapi/times.py,sha256=_qXgDaYwsHntvpIKSKXp1rrYIgtq6Z9pLyLnO2XNoL0,360
13
13
  chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
14
14
  chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
16
- chdb/session/state.py,sha256=UtObxVuyNgeqFkTXVHtmOknR90Pe1dEzbOpKFDBYOkg,4845
16
+ chdb/session/state.py,sha256=m7K9zZtoMQTlh-pfmSyJV38pAe6eHNTPtOvlHYrImhA,4436
17
17
  chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
18
18
  chdb/state/sqlitelike.py,sha256=v0xh9jWirHzhDVq26C2213LxfaDbRulSAhSHaTiZ24c,12283
19
19
  chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
@@ -21,8 +21,8 @@ chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
21
21
  chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
22
22
  chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
23
23
  chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
24
- chdb-3.2.0.dist-info/licenses/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
25
- chdb-3.2.0.dist-info/METADATA,sha256=n8no2boSiPOzbj8PtIT3nIQUtYGXQ1TEgq4QSOFzI4U,21585
26
- chdb-3.2.0.dist-info/WHEEL,sha256=M2FOsHH_BuimYw_ru4EeyFEv1eIcU5dgHIB25KOEKEE,111
27
- chdb-3.2.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
28
- chdb-3.2.0.dist-info/RECORD,,
24
+ chdb-3.4.0.dist-info/licenses/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
25
+ chdb-3.4.0.dist-info/METADATA,sha256=pwAsyrEzxFDBifcZoNLzRQHB7bCXd2B8y-31HX3APPE,24690
26
+ chdb-3.4.0.dist-info/WHEEL,sha256=jB_TrBsFLSA69IrIzS7unbPPVKNDT18SGGKAwl3MR44,111
27
+ chdb-3.4.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
28
+ chdb-3.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp310-cp310-macosx_10_15_x86_64
5
5