chdb 2.2.0b1__cp310-cp310-macosx_10_15_x86_64.whl → 3.0.1__cp310-cp310-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import sys
2
2
  import os
3
+ import threading
3
4
 
4
5
 
5
6
  class ChdbError(Exception):
@@ -18,7 +19,7 @@ _process_result_format_funs = {
18
19
  # UDF script path will be f"{g_udf_path}/{func_name}.py"
19
20
  g_udf_path = ""
20
21
 
21
- chdb_version = ('2', '2', '0b1')
22
+ chdb_version = ('3', '0', '1')
22
23
  if sys.version_info[:2] >= (3, 7):
23
24
  # get the path of the current file
24
25
  current_path = os.path.dirname(os.path.abspath(__file__))
@@ -29,7 +30,9 @@ if sys.version_info[:2] >= (3, 7):
29
30
  from . import _chdb # noqa
30
31
 
31
32
  os.chdir(cwd)
32
- engine_version = str(_chdb.query("SELECT version();", "CSV").bytes())[3:-4]
33
+ conn = _chdb.connect()
34
+ engine_version = str(conn.query("SELECT version();", "CSV").bytes())[3:-4]
35
+ conn.close()
33
36
  else:
34
37
  raise NotImplementedError("Python 3.6 or lower version is not supported")
35
38
 
@@ -64,18 +67,44 @@ def to_df(r):
64
67
  return t.to_pandas(use_threads=True)
65
68
 
66
69
 
70
+ # global connection lock, for multi-threading use of legacy chdb.query()
71
+ g_conn_lock = threading.Lock()
72
+
73
+
67
74
  # wrap _chdb functions
68
75
  def query(sql, output_format="CSV", path="", udf_path=""):
69
76
  global g_udf_path
70
77
  if udf_path != "":
71
78
  g_udf_path = udf_path
79
+ conn_str = ""
80
+ if path == "":
81
+ conn_str = ":memory:"
82
+ else:
83
+ conn_str = f"{path}"
84
+ if g_udf_path != "":
85
+ if "?" in conn_str:
86
+ conn_str = f"{conn_str}&udf_path={g_udf_path}"
87
+ else:
88
+ conn_str = f"{conn_str}?udf_path={g_udf_path}"
89
+ if output_format == "Debug":
90
+ output_format = "CSV"
91
+ if "?" in conn_str:
92
+ conn_str = f"{conn_str}&verbose&log-level=test"
93
+ else:
94
+ conn_str = f"{conn_str}?verbose&log-level=test"
95
+
72
96
  lower_output_format = output_format.lower()
73
97
  result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
74
98
  if lower_output_format in _arrow_format:
75
99
  output_format = "Arrow"
76
- res = _chdb.query(sql, output_format, path=path, udf_path=g_udf_path)
77
- if res.has_error():
78
- raise ChdbError(res.error_message())
100
+
101
+ with g_conn_lock:
102
+ conn = _chdb.connect(conn_str)
103
+ res = conn.query(sql, output_format)
104
+ if res.has_error():
105
+ conn.close()
106
+ raise ChdbError(res.error_message())
107
+ conn.close()
79
108
  return result_func(res)
80
109
 
81
110
 
Binary file
chdb/dbapi/converters.py CHANGED
@@ -59,6 +59,7 @@ def escape_float(value, mapping=None):
59
59
 
60
60
  _escape_table = [chr(x) for x in range(128)]
61
61
  _escape_table[ord("'")] = u"''"
62
+ _escape_table[ord("\\")] = "\\\\"
62
63
 
63
64
 
64
65
  def _escape_unicode(value, mapping=None):
chdb/session/state.py CHANGED
@@ -1,42 +1,107 @@
1
1
  import tempfile
2
2
  import shutil
3
+ import warnings
3
4
 
4
- from chdb import query
5
+ import chdb
6
+ from ..state import sqlitelike as chdb_stateful
7
+
8
+
9
+ g_session = None
10
+ g_session_path = None
5
11
 
6
12
 
7
13
  class Session:
8
14
  """
9
- Session will keep the state of query. All DDL and DML state will be kept in a dir.
10
- Dir path could be passed in as an argument. If not, a temporary dir will be created.
15
+ Session will keep the state of query.
16
+ If path is None, it will create a temporary directory and use it as the database path
17
+ and the temporary directory will be removed when the session is closed.
18
+ You can also pass in a path to create a database at that path where will keep your data.
19
+
20
+ You can also use a connection string to pass in the path and other parameters.
21
+ Examples:
22
+ - ":memory:" (for in-memory database)
23
+ - "test.db" (for relative path)
24
+ - "file:test.db" (same as above)
25
+ - "/path/to/test.db" (for absolute path)
26
+ - "file:/path/to/test.db" (same as above)
27
+ - "file:test.db?param1=value1&param2=value2" (for relative path with query params)
28
+ - "file::memory:?verbose&log-level=test" (for in-memory database with query params)
29
+ - "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
11
30
 
12
- If path is not specified, the temporary dir will be deleted when the Session object is deleted.
13
- Otherwise path will be kept.
31
+ Connection string args handling:
32
+ Connection string can contain query params like "file:test.db?param1=value1&param2=value2"
33
+ "param1=value1" will be passed to ClickHouse engine as start up args.
14
34
 
15
- Note: The default database is "_local" and the default engine is "Memory" which means all data
16
- will be stored in memory. If you want to store data in disk, you should create another database.
35
+ For more details, see `clickhouse local --help --verbose`
36
+ Some special args handling:
37
+ - "mode=ro" would be "--readonly=1" for clickhouse (read-only mode)
38
+
39
+ Important:
40
+ - There can be only one session at a time. If you want to create a new session, you need to close the existing one.
41
+ - Creating a new session will close the existing one.
17
42
  """
18
43
 
19
44
  def __init__(self, path=None):
20
- if path is None:
45
+ global g_session, g_session_path
46
+ if g_session is not None:
47
+ warnings.warn(
48
+ "There is already an active session. Creating a new session will close the existing one. "
49
+ "It is recommended to close the existing session before creating a new one. "
50
+ f"Closing the existing session {g_session_path}"
51
+ )
52
+ g_session.close()
53
+ g_session_path = None
54
+ if path is None or ":memory:" in path:
21
55
  self._cleanup = True
22
56
  self._path = tempfile.mkdtemp()
23
57
  else:
24
58
  self._cleanup = False
25
59
  self._path = path
60
+ if chdb.g_udf_path != "":
61
+ self._udf_path = chdb.g_udf_path
62
+ # add udf_path to conn_str here.
63
+ # - the `user_scripts_path` will be the value of `udf_path`
64
+ # - the `user_defined_executable_functions_config` will be `user_scripts_path/*.xml`
65
+ # Both of them will be added to the conn_str in the Connection class
66
+ if "?" in self._path:
67
+ self._conn_str = f"{self._path}&udf_path={self._udf_path}"
68
+ else:
69
+ self._conn_str = f"{self._path}?udf_path={self._udf_path}"
70
+ else:
71
+ self._udf_path = ""
72
+ self._conn_str = f"{self._path}"
73
+ self._conn = chdb_stateful.Connection(self._conn_str)
74
+ g_session = self
75
+ g_session_path = self._path
26
76
 
27
77
  def __del__(self):
28
- if self._cleanup:
29
- self.cleanup()
78
+ self.close()
30
79
 
31
80
  def __enter__(self):
32
81
  return self
33
82
 
34
83
  def __exit__(self, exc_type, exc_value, traceback):
35
- self.cleanup()
84
+ self.close()
85
+
86
+ def close(self):
87
+ if self._cleanup:
88
+ self.cleanup()
89
+ if self._conn is not None:
90
+ self._conn.close()
91
+ self._conn = None
92
+ global g_session, g_session_path
93
+ g_session = None
94
+ g_session_path = None
36
95
 
37
96
  def cleanup(self):
38
97
  try:
98
+ if self._conn is not None:
99
+ self._conn.close()
100
+ self._conn = None
39
101
  shutil.rmtree(self._path)
102
+ global g_session, g_session_path
103
+ g_session = None
104
+ g_session_path = None
40
105
  except: # noqa
41
106
  pass
42
107
 
@@ -44,7 +109,14 @@ class Session:
44
109
  """
45
110
  Execute a query.
46
111
  """
47
- return query(sql, fmt, path=self._path, udf_path=udf_path)
112
+ if fmt == "Debug":
113
+ warnings.warn(
114
+ """Debug format is not supported in Session.query
115
+ Please try use parameters in connection string instead:
116
+ Eg: conn = connect(f"db_path?verbose&log-level=test")"""
117
+ )
118
+ fmt = "CSV"
119
+ return self._conn.query(sql, fmt)
48
120
 
49
121
  # alias sql = query
50
122
  sql = query
chdb/state/sqlitelike.py CHANGED
@@ -21,7 +21,7 @@ class Connection:
21
21
  self._cursor = Cursor(self._conn)
22
22
  return self._cursor
23
23
 
24
- def query(self, query: str, format: str = "ArrowStream") -> Any:
24
+ def query(self, query: str, format: str = "CSV") -> Any:
25
25
  return self._conn.query(query, format)
26
26
 
27
27
  def close(self) -> None:
@@ -109,13 +109,14 @@ def connect(connection_string: str = ":memory:") -> Connection:
109
109
 
110
110
  Args:
111
111
  connection_string (str, optional): Connection string. Defaults to ":memory:".
112
- Aslo support file path like:
112
+ Also support file path like:
113
113
  - ":memory:" (for in-memory database)
114
114
  - "test.db" (for relative path)
115
115
  - "file:test.db" (same as above)
116
116
  - "/path/to/test.db" (for absolute path)
117
117
  - "file:/path/to/test.db" (same as above)
118
118
  - "file:test.db?param1=value1&param2=value2" (for relative path with query params)
119
+ - "file::memory:?verbose&log-level=test" (for in-memory database with query params)
119
120
  - "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
120
121
 
121
122
  Connection string args handling:
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: chdb
3
- Version: 2.2.0b1
3
+ Version: 3.0.1
4
4
  Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
5
5
  Home-page: https://github.com/chdb-io/chdb
6
6
  Author: auxten
@@ -30,6 +30,8 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
30
30
  License-File: LICENSE.txt
31
31
  Requires-Dist: pyarrow>=13.0.0
32
32
  Requires-Dist: pandas>=2.0.0
33
+ Dynamic: requires-dist
34
+ Dynamic: requires-python
33
35
 
34
36
  <div align="center">
35
37
  <a href="https://clickhouse.com/blog/chdb-joins-clickhouse-family">📢 chDB joins the ClickHouse family 🐍+🚀</a>
@@ -66,7 +68,7 @@ Requires-Dist: pandas>=2.0.0
66
68
 
67
69
  ## Arch
68
70
  <div align="center">
69
- <img src="https://github.com/chdb-io/chdb/raw/main/docs/_static/arch-chdb2.png" width="450">
71
+ <img src="https://github.com/chdb-io/chdb/raw/main/docs/_static/arch-chdb3.png" width="450">
70
72
  </div>
71
73
 
72
74
  ## Get Started
@@ -1,28 +1,28 @@
1
- chdb/__init__.py,sha256=li1sCjhLx7HmRBbWtekahee1iQtJl5p69oGYIaVVplk,2994
1
+ chdb/__init__.py,sha256=6nRIwbUg4kHRY8E8EQzDv2wT-IyR0-tsr3Mu3n_Mi9Q,3762
2
2
  chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
3
- chdb/_chdb.cpython-310-darwin.so,sha256=s_5Oly4JVYMcgp7akK_HHg0BA48RYpPdAPo7ZSgHLr8,421932420
3
+ chdb/_chdb.cpython-310-darwin.so,sha256=dmUiv-V1v02Cw0SDC-XYe-fKxd8UPS-c0hUcNpJj_4c,422085000
4
4
  chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
5
5
  chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
6
6
  chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
7
7
  chdb/dbapi/__init__.py,sha256=aaNhxXNBC1ZkFr260cbGR8msOinTp0VoNTT_j8AXGUc,2205
8
8
  chdb/dbapi/connections.py,sha256=4RBO0h-B149xEicE8cXSSJl9wpXa4FQMY_4SghgEvCw,2762
9
- chdb/dbapi/converters.py,sha256=qS9k0Kzo_vDQxnFtsJ_3pLjlTrBK09SfdWcXdxzIrtI,7413
9
+ chdb/dbapi/converters.py,sha256=0SDqgixUTCz0LtWke_HHzgF1lFJhpsQrR_-ky3b-JRY,7447
10
10
  chdb/dbapi/cursors.py,sha256=OXF36raoyI3MIC5SCQ5IvnCtbOnppga4Q1IKOt2EIsk,7920
11
11
  chdb/dbapi/err.py,sha256=kUI9-A8LNqBoMoo4jh2NFsLCOLoPEwh9YIuz_qMoLoM,2017
12
12
  chdb/dbapi/times.py,sha256=_qXgDaYwsHntvpIKSKXp1rrYIgtq6Z9pLyLnO2XNoL0,360
13
13
  chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
14
14
  chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
16
- chdb/session/state.py,sha256=Hc9lJGLZ97Un7ElBbeWeCZoc9p_m1k_ojQbzJMPBPKg,1341
16
+ chdb/session/state.py,sha256=nx9KlqZyPTHAflToXCJVRBUSMjJFvyh6x2akP7Gc7h0,4360
17
17
  chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
18
- chdb/state/sqlitelike.py,sha256=RSVJW5M3hH_TUBcXiH7zna21WTSGbqkOccyEiFFd8PI,4425
18
+ chdb/state/sqlitelike.py,sha256=-2bQRs6WDREr90pe8UtaYbQK-BWSkko1Ma8cLSzLUf4,4511
19
19
  chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
20
20
  chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
21
21
  chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
22
22
  chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
23
23
  chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
24
- chdb-2.2.0b1.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
25
- chdb-2.2.0b1.dist-info/METADATA,sha256=cjvQmdOiS1pll_E2ohhXF2DVb7wdbVndY00xLuCnVi8,19444
26
- chdb-2.2.0b1.dist-info/WHEEL,sha256=mzY66v81iH1SQD4BkoxzJ6tEjPh3oKBwQCxVJDNXy4I,111
27
- chdb-2.2.0b1.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
28
- chdb-2.2.0b1.dist-info/RECORD,,
24
+ chdb-3.0.1.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
25
+ chdb-3.0.1.dist-info/METADATA,sha256=k9gmduudI-PNBH5QCOle-dKL6ascC9StyUnX4vz6gFs,19490
26
+ chdb-3.0.1.dist-info/WHEEL,sha256=HQquLNNRUomuzWueBNrqINFman-CGVesKQI-ZGwXyWQ,111
27
+ chdb-3.0.1.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
28
+ chdb-3.0.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp310-cp310-macosx_10_15_x86_64
5
5