chdb 3.2.0__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 3.4.0__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/__init__.py CHANGED
@@ -19,7 +19,7 @@ _process_result_format_funs = {
19
19
  # UDF script path will be f"{g_udf_path}/{func_name}.py"
20
20
  g_udf_path = ""
21
21
 
22
- chdb_version = ('3', '2', '0')
22
+ chdb_version = ('3', '4', '0')
23
23
  if sys.version_info[:2] >= (3, 7):
24
24
  # get the path of the current file
25
25
  current_path = os.path.dirname(os.path.abspath(__file__))
Binary file
chdb/session/state.py CHANGED
@@ -1,5 +1,3 @@
1
- import tempfile
2
- import shutil
3
1
  import warnings
4
2
 
5
3
  import chdb
@@ -51,11 +49,9 @@ class Session:
51
49
  )
52
50
  g_session.close()
53
51
  g_session_path = None
54
- if path is None or ":memory:" in path:
55
- self._cleanup = True
56
- self._path = tempfile.mkdtemp()
52
+ if path is None:
53
+ self._path = ":memory:"
57
54
  else:
58
- self._cleanup = False
59
55
  self._path = path
60
56
  if chdb.g_udf_path != "":
61
57
  self._udf_path = chdb.g_udf_path
@@ -84,8 +80,6 @@ class Session:
84
80
  self.close()
85
81
 
86
82
  def close(self):
87
- if self._cleanup:
88
- self.cleanup()
89
83
  if self._conn is not None:
90
84
  self._conn.close()
91
85
  self._conn = None
@@ -95,13 +89,7 @@ class Session:
95
89
 
96
90
  def cleanup(self):
97
91
  try:
98
- if self._conn is not None:
99
- self._conn.close()
100
- self._conn = None
101
- shutil.rmtree(self._path)
102
- global g_session, g_session_path
103
- g_session = None
104
- g_session_path = None
92
+ self.close()
105
93
  except: # noqa
106
94
  pass
107
95
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chdb
3
- Version: 3.2.0
3
+ Version: 3.4.0
4
4
  Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
5
5
  Home-page: https://github.com/chdb-io/chdb
6
6
  Author: auxten
@@ -51,11 +51,11 @@ Requires-Dist: pandas >=2.0.0
51
51
 
52
52
 
53
53
  > chDB is an in-process SQL OLAP Engine powered by ClickHouse [^1]
54
- > For more details: [The birth of chDB](https://auxten.com/the-birth-of-chdb/)
54
+ > For more details: [The birth of chDB](https://auxten.com/the-birth-of-chdb/)
55
55
 
56
56
 
57
57
  ## Features
58
-
58
+
59
59
  * In-process SQL OLAP Engine, powered by ClickHouse
60
60
  * No need to install ClickHouse
61
61
  * Minimized data copy from C++ to Python with [python memoryview](https://docs.python.org/3/c-api/memoryview.html)
@@ -146,7 +146,7 @@ res = chdb.query('select version()', 'Pretty'); print(res)
146
146
  # See more data type format in tests/format_output.py
147
147
  res = chdb.query('select * from file("data.parquet", Parquet)', 'JSON'); print(res)
148
148
  res = chdb.query('select * from file("data.csv", CSV)', 'CSV'); print(res)
149
- print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, elapsed {res.elapsed()} seconds")
149
+ print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, storage read {res.storage_rows_read()} rows, {res.storage_bytes_read()} bytes, elapsed {res.elapsed()} seconds")
150
150
  ```
151
151
 
152
152
  ### Pandas dataframe output
@@ -171,6 +171,8 @@ ret_tbl = cdf.query(sql="select * from __tbl1__ t1 join __tbl2__ t2 on t1.a = t2
171
171
  print(ret_tbl)
172
172
  # Query on the DataFrame Table
173
173
  print(ret_tbl.query('select b, sum(a) from __table__ group by b'))
174
+ # Pandas DataFrames are automatically registered as temporary tables in ClickHouse
175
+ chdb.query("SELECT * FROM Python(df1) t1 JOIN Python(df2) t2 ON t1.a = t2.c").show()
174
176
  ```
175
177
  </details>
176
178
 
@@ -318,10 +320,19 @@ df = pd.DataFrame(
318
320
  {
319
321
  "a": [1, 2, 3, 4, 5, 6],
320
322
  "b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
323
+ "dict_col": [
324
+ {'id': 1, 'tags': ['urgent', 'important'], 'metadata': {'created': '2024-01-01'}},
325
+ {'id': 2, 'tags': ['normal'], 'metadata': {'created': '2024-02-01'}},
326
+ {'id': 3, 'name': 'tom'},
327
+ {'id': 4, 'value': '100'},
328
+ {'id': 5, 'value': 101},
329
+ {'id': 6, 'value': 102},
330
+ ],
321
331
  }
322
332
  )
323
333
 
324
334
  chdb.query("SELECT b, sum(a) FROM Python(df) GROUP BY b ORDER BY b").show()
335
+ chdb.query("SELECT dict_col.id FROM Python(df) WHERE dict_col.value='100'").show()
325
336
  ```
326
337
 
327
338
  ### Query on Arrow Table
@@ -333,12 +344,19 @@ arrow_table = pa.table(
333
344
  {
334
345
  "a": [1, 2, 3, 4, 5, 6],
335
346
  "b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
347
+ "dict_col": [
348
+ {'id': 1, 'value': 'tom'},
349
+ {'id': 2, 'value': 'jerry'},
350
+ {'id': 3, 'value': 'auxten'},
351
+ {'id': 4, 'value': 'tom'},
352
+ {'id': 5, 'value': 'jerry'},
353
+ {'id': 6, 'value': 'auxten'},
354
+ ],
336
355
  }
337
356
  )
338
357
 
339
- chdb.query(
340
- "SELECT b, sum(a) FROM Python(arrow_table) GROUP BY b ORDER BY b", "debug"
341
- ).show()
358
+ chdb.query("SELECT b, sum(a) FROM Python(arrow_table) GROUP BY b ORDER BY b").show()
359
+ chdb.query("SELECT dict_col.id FROM Python(arrow_table) WHERE dict_col.value='tom'").show()
342
360
  ```
343
361
 
344
362
  ### Query on chdb.PyReader class instance
@@ -362,24 +380,79 @@ class myReader(chdb.PyReader):
362
380
  def read(self, col_names, count):
363
381
  print("Python func read", col_names, count, self.cursor)
364
382
  if self.cursor >= len(self.data["a"]):
383
+ self.cursor = 0
365
384
  return []
366
385
  block = [self.data[col] for col in col_names]
367
386
  self.cursor += len(block[0])
368
387
  return block
369
388
 
389
+ def get_schema(self):
390
+ return [
391
+ ("a", "int"),
392
+ ("b", "str"),
393
+ ("dict_col", "json")
394
+ ]
395
+
370
396
  reader = myReader(
371
397
  {
372
398
  "a": [1, 2, 3, 4, 5, 6],
373
399
  "b": ["tom", "jerry", "auxten", "tom", "jerry", "auxten"],
400
+ "dict_col": [
401
+ {'id': 1, 'tags': ['urgent', 'important'], 'metadata': {'created': '2024-01-01'}},
402
+ {'id': 2, 'tags': ['normal'], 'metadata': {'created': '2024-02-01'}},
403
+ {'id': 3, 'name': 'tom'},
404
+ {'id': 4, 'value': '100'},
405
+ {'id': 5, 'value': 101},
406
+ {'id': 6, 'value': 102}
407
+ ],
374
408
  }
375
409
  )
376
410
 
377
- chdb.query(
378
- "SELECT b, sum(a) FROM Python(reader) GROUP BY b ORDER BY b"
379
- ).show()
411
+ chdb.query("SELECT b, sum(a) FROM Python(reader) GROUP BY b ORDER BY b").show()
412
+ chdb.query("SELECT dict_col.id FROM Python(reader) WHERE dict_col.value='100'").show()
380
413
  ```
381
414
 
382
- see also: [test_query_py.py](tests/test_query_py.py).
415
+ see also: [test_query_py.py](tests/test_query_py.py) and [test_query_json.py](tests/test_query_json.py).
416
+
417
+ ### JSON Type Inference
418
+
419
+ chDB automatically converts Python dictionary objects to ClickHouse JSON types from these sources:
420
+
421
+ 1. **Pandas DataFrame**
422
+ - Columns with `object` dtype are sampled (default 10,000 rows) to detect JSON structures.
423
+ - Control sampling via SQL settings:
424
+ ```sql
425
+ SET pandas_analyze_sample = 10000 -- Default sampling
426
+ SET pandas_analyze_sample = 0 -- Force String type
427
+ SET pandas_analyze_sample = -1 -- Force JSON type
428
+ ```
429
+ - Columns are converted to `String` if sampling finds non-dictionary values.
430
+
431
+ 2. **Arrow Table**
432
+ - `struct` type columns are automatically mapped to JSON columns.
433
+ - Nested structures preserve type information.
434
+
435
+ 3. **chdb.PyReader**
436
+ - Implement custom schema mapping in `get_schema()`:
437
+ ```python
438
+ def get_schema(self):
439
+ return [
440
+ ("c1", "JSON"), # Explicit JSON mapping
441
+ ("c2", "String")
442
+ ]
443
+ ```
444
+ - Column types declared as "JSON" will bypass auto-detection.
445
+
446
+ When converting Python dictionary objects to JSON columns:
447
+
448
+ 1. **Nested Structures**
449
+ - Recursively process nested dictionaries, lists, tuples and NumPy arrays.
450
+
451
+ 2. **Primitive Types**
452
+ - Automatic type recognition for basic types such as integers, floats, strings, and booleans, and more.
453
+
454
+ 3. **Complex Objects**
455
+ - Non-primitive types will be converted to strings.
383
456
 
384
457
  ### Limitations
385
458
 
@@ -1,9 +1,9 @@
1
1
  chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
2
- chdb/__init__.py,sha256=yuWj0i3_5-uBRZCyZMBKIiBR1MmjEyAjcuxKTm076jI,3762
3
- chdb/_chdb.cpython-38-aarch64-linux-gnu.so,sha256=p6q6ZujWMVJL1w2bFECrDzmf69T2tDXH7BYbjmtGEQQ,533414320
2
+ chdb/__init__.py,sha256=pNYyLRqm2s5hG1rPhVLECZXvJQ60EGPt_OC88W90j0w,3762
3
+ chdb/_chdb.cpython-38-aarch64-linux-gnu.so,sha256=jWd7xJeE74xK21ep7dDeYMtYrEW-VVzxYN4HgQf8PiY,568828616
4
4
  chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
5
5
  chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
6
- chdb/session/state.py,sha256=UtObxVuyNgeqFkTXVHtmOknR90Pe1dEzbOpKFDBYOkg,4845
6
+ chdb/session/state.py,sha256=m7K9zZtoMQTlh-pfmSyJV38pAe6eHNTPtOvlHYrImhA,4436
7
7
  chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
8
8
  chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
9
9
  chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
@@ -21,8 +21,8 @@ chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSA
21
21
  chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
23
23
  chdb/state/sqlitelike.py,sha256=v0xh9jWirHzhDVq26C2213LxfaDbRulSAhSHaTiZ24c,12283
24
- chdb-3.2.0.dist-info/RECORD,,
25
- chdb-3.2.0.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
26
- chdb-3.2.0.dist-info/WHEEL,sha256=kTTaziUGh4IgOXqp-kkEc0J_ej7kOw0CdWocYU-1ZR0,149
27
- chdb-3.2.0.dist-info/METADATA,sha256=SPZ4Gn_IrMBxh3-zVDXQpQejpUV33SdEAVdqGarSLJ8,21517
28
- chdb-3.2.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
24
+ chdb-3.4.0.dist-info/RECORD,,
25
+ chdb-3.4.0.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
26
+ chdb-3.4.0.dist-info/WHEEL,sha256=kTTaziUGh4IgOXqp-kkEc0J_ej7kOw0CdWocYU-1ZR0,149
27
+ chdb-3.4.0.dist-info/METADATA,sha256=niEMQj5RD1T34yBzfkJzW9K4gziEvAnITAuzNN10AIs,24622
28
+ chdb-3.4.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
File without changes