pyseekdb 0.1.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,258 @@
1
+ """
2
+ OceanBase mode client - based on pymysql
3
+ """
4
+ import logging
5
+ from typing import Any, List, Optional, Sequence, Dict, Union
6
+
7
+ import pymysql
8
+ from pymysql.cursors import DictCursor
9
+
10
+ from .client_base import BaseClient
11
+ from .collection import Collection
12
+ from .database import Database
13
+ from .admin_client import DEFAULT_TENANT
14
+ from .query_result import QueryResult, QueryResultItem
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class OceanBaseServerClient(BaseClient):
20
+ """OceanBase database client (based on pymysql, lazy connection)"""
21
+
22
+ def __init__(
23
+ self,
24
+ host: str = "localhost",
25
+ port: int = 2881,
26
+ tenant: str = "test",
27
+ database: str = "test",
28
+ user: str = "root",
29
+ password: str = "",
30
+ **kwargs
31
+ ):
32
+ """
33
+ Initialize OceanBase client (no immediate connection)
34
+
35
+ Args:
36
+ host: OceanBase server address
37
+ port: OceanBase server port (default 2881)
38
+ tenant: tenant name
39
+ database: database name
40
+ user: username (without tenant suffix)
41
+ password: password
42
+ **kwargs: other pymysql connection parameters
43
+ """
44
+ self.host = host
45
+ self.port = port
46
+ self.tenant = tenant
47
+ self.database = database
48
+ self.user = user
49
+ self.password = password
50
+ self.kwargs = kwargs
51
+
52
+ # OceanBase username format: user@tenant
53
+ self.full_user = f"{user}@{tenant}"
54
+ self._connection: Optional[pymysql.Connection] = None
55
+
56
+ logger.info(
57
+ f"Initialize OceanBaseServerClient: {self.full_user}@{self.host}:{self.port}/{self.database}"
58
+ )
59
+
60
+ # ==================== Connection Management ====================
61
+
62
+ def _ensure_connection(self) -> pymysql.Connection:
63
+ """Ensure connection is established (internal method)"""
64
+ if self._connection is None or not self._connection.open:
65
+ self._connection = pymysql.connect(
66
+ host=self.host,
67
+ port=self.port,
68
+ user=self.full_user, # OceanBase format: user@tenant
69
+ password=self.password,
70
+ database=self.database,
71
+ cursorclass=DictCursor,
72
+ **self.kwargs
73
+ )
74
+ logger.info(f"✅ Connected to OceanBase: {self.host}:{self.port}/{self.database}")
75
+
76
+ return self._connection
77
+
78
+ def _cleanup(self):
79
+ """Internal cleanup method: close connection)"""
80
+ if self._connection is not None:
81
+ self._connection.close()
82
+ self._connection = None
83
+ logger.info("Connection closed")
84
+
85
+ def is_connected(self) -> bool:
86
+ """Check connection status"""
87
+ return self._connection is not None and self._connection.open
88
+
89
+ def execute(self, sql: str) -> Any:
90
+ conn = self._ensure_connection()
91
+
92
+ with conn.cursor() as cursor:
93
+ cursor.execute(sql)
94
+
95
+ sql_upper = sql.strip().upper()
96
+ if (sql_upper.startswith('SELECT') or
97
+ sql_upper.startswith('SHOW') or
98
+ sql_upper.startswith('DESCRIBE') or
99
+ sql_upper.startswith('DESC')):
100
+ return cursor.fetchall()
101
+
102
+ conn.commit()
103
+ return cursor
104
+
105
+ def get_raw_connection(self) -> pymysql.Connection:
106
+ """Get raw connection object"""
107
+ return self._ensure_connection()
108
+
109
+ @property
110
+ def mode(self) -> str:
111
+ return "OceanBaseServerClient"
112
+
113
+ # ==================== Collection Management (framework) ====================
114
+
115
+ # create_collection is inherited from BaseClient - no override needed
116
+ # get_collection is inherited from BaseClient - no override needed
117
+ # delete_collection is inherited from BaseClient - no override needed
118
+ # list_collections is inherited from BaseClient - no override needed
119
+ # has_collection is inherited from BaseClient - no override needed
120
+
121
+ # ==================== Collection Internal Operations ====================
122
+ # These methods are called by Collection objects
123
+
124
+ # -------------------- DML Operations --------------------
125
+ # _collection_add is inherited from BaseClient
126
+ # _collection_update is inherited from BaseClient
127
+ # _collection_upsert is inherited from BaseClient
128
+ # _collection_delete is inherited from BaseClient
129
+
130
+ # -------------------- DQL Operations --------------------
131
+ # Note: _collection_query() and _collection_get() use base class implementation
132
+
133
+ # _collection_hybrid_search is inherited from BaseClient
134
+ # -------------------- Collection Info --------------------
135
+
136
+ # _collection_count is inherited from BaseClient - no override needed
137
+
138
+ # ==================== Database Management ====================
139
+
140
+ def create_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None:
141
+ """
142
+ Create database (OceanBase has tenant concept, uses client's tenant)
143
+
144
+ Args:
145
+ name: database name
146
+ tenant: tenant name (if different from client tenant, will use client tenant)
147
+
148
+ Note:
149
+ OceanBase has multi-tenant architecture. Database is scoped to client's tenant.
150
+ """
151
+ if tenant != self.tenant and tenant != DEFAULT_TENANT:
152
+ logger.warning(f"Specified tenant '{tenant}' differs from client tenant '{self.tenant}', using client tenant")
153
+
154
+ logger.info(f"Creating database: {name} in tenant: {self.tenant}")
155
+ sql = f"CREATE DATABASE IF NOT EXISTS `{name}`"
156
+ self.execute(sql)
157
+ logger.info(f"✅ Database created: {name} in tenant: {self.tenant}")
158
+
159
+ def get_database(self, name: str, tenant: str = DEFAULT_TENANT) -> Database:
160
+ """
161
+ Get database object (OceanBase has tenant concept, uses client's tenant)
162
+
163
+ Args:
164
+ name: database name
165
+ tenant: tenant name (if different from client tenant, will use client tenant)
166
+
167
+ Returns:
168
+ Database object with tenant information
169
+
170
+ Note:
171
+ OceanBase has multi-tenant architecture. Database is scoped to client's tenant.
172
+ """
173
+ if tenant != self.tenant and tenant != DEFAULT_TENANT:
174
+ logger.warning(f"Specified tenant '{tenant}' differs from client tenant '{self.tenant}', using client tenant")
175
+
176
+ logger.info(f"Getting database: {name} in tenant: {self.tenant}")
177
+ sql = f"SELECT SCHEMA_NAME, DEFAULT_CHARACTER_SET_NAME, DEFAULT_COLLATION_NAME FROM information_schema.SCHEMATA WHERE SCHEMA_NAME = '{name}'"
178
+ result = self.execute(sql)
179
+
180
+ if not result:
181
+ raise ValueError(f"Database not found: {name}")
182
+
183
+ row = result[0]
184
+ return Database(
185
+ name=row['SCHEMA_NAME'],
186
+ tenant=self.tenant, # OceanBase has tenant concept
187
+ charset=row['DEFAULT_CHARACTER_SET_NAME'],
188
+ collation=row['DEFAULT_COLLATION_NAME']
189
+ )
190
+
191
+ def delete_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None:
192
+ """
193
+ Delete database (OceanBase has tenant concept, uses client's tenant)
194
+
195
+ Args:
196
+ name: database name
197
+ tenant: tenant name (if different from client tenant, will use client tenant)
198
+
199
+ Note:
200
+ OceanBase has multi-tenant architecture. Database is scoped to client's tenant.
201
+ """
202
+ if tenant != self.tenant and tenant != DEFAULT_TENANT:
203
+ logger.warning(f"Specified tenant '{tenant}' differs from client tenant '{self.tenant}', using client tenant")
204
+
205
+ logger.info(f"Deleting database: {name} in tenant: {self.tenant}")
206
+ sql = f"DROP DATABASE IF EXISTS `{name}`"
207
+ self.execute(sql)
208
+ logger.info(f"✅ Database deleted: {name} in tenant: {self.tenant}")
209
+
210
+ def list_databases(
211
+ self,
212
+ limit: Optional[int] = None,
213
+ offset: Optional[int] = None,
214
+ tenant: str = DEFAULT_TENANT
215
+ ) -> Sequence[Database]:
216
+ """
217
+ List all databases (OceanBase has tenant concept, uses client's tenant)
218
+
219
+ Args:
220
+ limit: maximum number of results to return
221
+ offset: number of results to skip
222
+ tenant: tenant name (if different from client tenant, will use client tenant)
223
+
224
+ Returns:
225
+ Sequence of Database objects with tenant information
226
+
227
+ Note:
228
+ OceanBase has multi-tenant architecture. Lists databases in client's tenant.
229
+ """
230
+ if tenant != self.tenant and tenant != DEFAULT_TENANT:
231
+ logger.warning(f"Specified tenant '{tenant}' differs from client tenant '{self.tenant}', using client tenant")
232
+
233
+ logger.info(f"Listing databases in tenant: {self.tenant}")
234
+ sql = "SELECT SCHEMA_NAME, DEFAULT_CHARACTER_SET_NAME, DEFAULT_COLLATION_NAME FROM information_schema.SCHEMATA"
235
+
236
+ if limit is not None:
237
+ if offset is not None:
238
+ sql += f" LIMIT {offset}, {limit}"
239
+ else:
240
+ sql += f" LIMIT {limit}"
241
+
242
+ result = self.execute(sql)
243
+
244
+ databases = []
245
+ for row in result:
246
+ databases.append(Database(
247
+ name=row['SCHEMA_NAME'],
248
+ tenant=self.tenant, # OceanBase has tenant concept
249
+ charset=row['DEFAULT_CHARACTER_SET_NAME'],
250
+ collation=row['DEFAULT_COLLATION_NAME']
251
+ ))
252
+
253
+ logger.info(f"✅ Found {len(databases)} databases in tenant {self.tenant}")
254
+ return databases
255
+
256
+ def __repr__(self):
257
+ status = "connected" if self.is_connected() else "disconnected"
258
+ return f"<OceanBaseServerClient {self.full_user}@{self.host}:{self.port}/{self.database} status={status}>"
@@ -0,0 +1,324 @@
1
+ """
2
+ Embedded mode client - based on seekdb
3
+ """
4
+ import os
5
+ import logging
6
+ from typing import Any, List, Optional, Sequence, Dict, Union
7
+
8
+ import seekdb # type: ignore
9
+
10
+ from .client_base import BaseClient
11
+ from .collection import Collection
12
+ from .database import Database
13
+ from .admin_client import DEFAULT_TENANT
14
+ from .query_result import QueryResult, QueryResultItem
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class SeekdbEmbeddedClient(BaseClient):
20
+ """Embedded SeekDB client (lazy connection)"""
21
+
22
+ def __init__(
23
+ self,
24
+ path: str = "./seekdb",
25
+ database: str = "test",
26
+ autocommit: bool = False,
27
+ **kwargs
28
+ ):
29
+ """
30
+ Initialize embedded client (no immediate connection)
31
+
32
+ Args:
33
+ path: seekdb data directory path
34
+ database: database name
35
+ autocommit: whether to auto-commit
36
+ """
37
+ self.path = os.path.abspath(path)
38
+ self.database = database
39
+ self.autocommit = autocommit
40
+ self._connection = None
41
+ self._initialized = False
42
+
43
+ logger.info(f"Initialize SeekdbEmbeddedClient: path={self.path}, database={self.database}")
44
+
45
+ # ==================== Connection Management ====================
46
+
47
+ def _ensure_connection(self) -> Any: # seekdb.Connection
48
+ """Ensure connection is established (internal method)"""
49
+ if not self._initialized:
50
+
51
+ # 1. open seekdb
52
+ try:
53
+ seekdb.open(db_dir=self.path) # type: ignore
54
+ logger.info(f"✅ SeekDB opened: {self.path}")
55
+ except Exception as e:
56
+ if "initialized twice" not in str(e):
57
+ raise
58
+ logger.debug(f"SeekDB already opened: {e}")
59
+
60
+ self._initialized = True
61
+
62
+ # 3. Create connection
63
+ if self._connection is None:
64
+ self._connection = seekdb.connect( # type: ignore
65
+ database=self.database,
66
+ autocommit=self.autocommit
67
+ )
68
+ logger.info(f"✅ Connected to database: {self.database}")
69
+
70
+ return self._connection
71
+
72
+ def _cleanup(self):
73
+ """Internal cleanup method: close connection)"""
74
+ if self._connection is not None:
75
+ self._connection.close()
76
+ self._connection = None
77
+ logger.info("Connection closed")
78
+
79
+ def is_connected(self) -> bool:
80
+ """Check connection status"""
81
+ return self._connection is not None and self._initialized
82
+
83
+ def execute(self, sql: str) -> Any:
84
+ conn = self._ensure_connection()
85
+ cursor = conn.cursor()
86
+
87
+ try:
88
+ cursor.execute(sql)
89
+
90
+ sql_upper = sql.strip().upper()
91
+ if (sql_upper.startswith('SELECT') or
92
+ sql_upper.startswith('SHOW') or
93
+ sql_upper.startswith('DESCRIBE') or
94
+ sql_upper.startswith('DESC')):
95
+ return cursor.fetchall()
96
+
97
+ if not self.autocommit:
98
+ conn.commit()
99
+
100
+ return cursor
101
+ except Exception as e:
102
+ if not self.autocommit:
103
+ conn.rollback()
104
+ raise e
105
+
106
+ def get_raw_connection(self) -> Any: # seekdb.Connection
107
+ """Get raw connection object"""
108
+ return self._ensure_connection()
109
+
110
+ @property
111
+ def mode(self) -> str:
112
+ return "SeekdbEmbeddedClient"
113
+
114
+ def _use_context_manager_for_cursor(self) -> bool:
115
+ """
116
+ Override to use try/finally instead of context manager for cursor
117
+ (SeekDB embedded client doesn't support context manager)
118
+ """
119
+ return False
120
+
121
+ def _execute_query_with_cursor(
122
+ self,
123
+ conn: Any,
124
+ sql: str,
125
+ params: List[Any],
126
+ use_context_manager: bool = True
127
+ ) -> List[Dict[str, Any]]:
128
+ """
129
+ Execute SQL query and return normalized rows
130
+ Override base class to handle pyseekdb cursor which doesn't support parameterized queries
131
+
132
+ Args:
133
+ conn: Database connection
134
+ sql: SQL query string with %s placeholders
135
+ params: Query parameters to embed in SQL
136
+ use_context_manager: Whether to use context manager (ignored for embedded client)
137
+
138
+ Returns:
139
+ List of normalized row dictionaries
140
+ """
141
+ # pyseekdb.Cursor.execute() only accepts SQL string, not parameters
142
+ # Embed parameters directly into SQL
143
+ embedded_sql = sql
144
+ for param in params:
145
+ if param is None:
146
+ embedded_sql = embedded_sql.replace('%s', 'NULL', 1)
147
+ elif isinstance(param, (int, float)):
148
+ embedded_sql = embedded_sql.replace('%s', str(param), 1)
149
+ elif isinstance(param, str):
150
+ escaped = param.replace("'", "''")
151
+ embedded_sql = embedded_sql.replace('%s', f"'{escaped}'", 1)
152
+ else:
153
+ # For other types (like lists in IN clauses), convert to string
154
+ escaped = str(param).replace("'", "''")
155
+ embedded_sql = embedded_sql.replace('%s', f"'{escaped}'", 1)
156
+
157
+ cursor = conn.cursor()
158
+ try:
159
+ cursor.execute(embedded_sql)
160
+ rows = cursor.fetchall()
161
+
162
+ # pyseekdb.Cursor doesn't have description, extract column names from SQL
163
+ cursor_description = getattr(cursor, 'description', None)
164
+ if cursor_description is None and rows:
165
+ import re
166
+ # Extract column names from SELECT clause using simple regex
167
+ select_match = re.search(r'SELECT\s+(.+?)\s+FROM', embedded_sql, re.IGNORECASE | re.DOTALL)
168
+ if select_match:
169
+ select_clause = select_match.group(1).strip()
170
+ # Split by comma, but skip commas inside parentheses (for function calls)
171
+ parts = []
172
+ depth = 0
173
+ current = ""
174
+ for char in select_clause:
175
+ if char == '(':
176
+ depth += 1
177
+ elif char == ')':
178
+ depth -= 1
179
+ elif char == ',' and depth == 0:
180
+ parts.append(current.strip())
181
+ current = ""
182
+ continue
183
+ current += char
184
+ if current:
185
+ parts.append(current.strip())
186
+
187
+ # Extract column names: look for AS alias, otherwise use column name
188
+ column_names = []
189
+ for part in parts:
190
+ # Match "AS alias" pattern
191
+ as_match = re.search(r'\s+AS\s+(\w+)', part, re.IGNORECASE)
192
+ if as_match:
193
+ column_names.append(as_match.group(1))
194
+ else:
195
+ # No alias, extract column name (remove backticks, get identifier)
196
+ col = part.replace('`', '').strip().split()[-1]
197
+ column_names.append(col)
198
+
199
+ cursor_description = [(name,) for name in column_names]
200
+
201
+ normalized_rows = []
202
+ for row in rows:
203
+ normalized_rows.append(self._normalize_row(row, cursor_description))
204
+ return normalized_rows
205
+ finally:
206
+ cursor.close()
207
+
208
+ # ==================== Collection Management (framework) ====================
209
+
210
+ # create_collection is inherited from BaseClient - no override needed
211
+ # get_collection is inherited from BaseClient - no override needed
212
+ # delete_collection is inherited from BaseClient - no override needed
213
+ # list_collections is inherited from BaseClient - no override needed
214
+ # has_collection is inherited from BaseClient - no override needed
215
+
216
+ # ==================== Collection Internal Operations ====================
217
+ # These methods are called by Collection objects
218
+
219
+ # -------------------- DML Operations --------------------
220
+ # _collection_add is inherited from BaseClient
221
+ # _collection_update is inherited from BaseClient
222
+ # _collection_upsert is inherited from BaseClient
223
+ # _collection_delete is inherited from BaseClient
224
+
225
+ # -------------------- DQL Operations --------------------
226
+ # Note: _collection_query() and _collection_get() use base class implementation
227
+
228
+ # _collection_hybrid_search is inherited from BaseClient
229
+
230
+ # -------------------- Collection Info --------------------
231
+
232
+ # _collection_count is inherited from BaseClient - no override needed
233
+
234
+ # ==================== Database Management ====================
235
+
236
+ def create_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None:
237
+ """
238
+ Create database (tenant parameter ignored for embedded mode)
239
+
240
+ Args:
241
+ name: database name
242
+ tenant: ignored for embedded mode (no tenant concept)
243
+ """
244
+ logger.info(f"Creating database: {name}")
245
+ sql = f"CREATE DATABASE IF NOT EXISTS `{name}`"
246
+ self.execute(sql)
247
+ logger.info(f"✅ Database created: {name}")
248
+
249
+ def get_database(self, name: str, tenant: str = DEFAULT_TENANT) -> Database:
250
+ """
251
+ Get database object (tenant parameter ignored for embedded mode)
252
+
253
+ Args:
254
+ name: database name
255
+ tenant: ignored for embedded mode (no tenant concept)
256
+ """
257
+ logger.info(f"Getting database: {name}")
258
+ sql = f"SELECT SCHEMA_NAME, DEFAULT_CHARACTER_SET_NAME, DEFAULT_COLLATION_NAME FROM information_schema.SCHEMATA WHERE SCHEMA_NAME = '{name}'"
259
+ result = self.execute(sql)
260
+
261
+ if not result:
262
+ raise ValueError(f"Database not found: {name}")
263
+
264
+ row = result[0]
265
+ return Database(
266
+ name=row[0] if isinstance(row, tuple) else row.get('SCHEMA_NAME'),
267
+ tenant=None, # No tenant concept in embedded mode
268
+ charset=row[1] if isinstance(row, tuple) else row.get('DEFAULT_CHARACTER_SET_NAME'),
269
+ collation=row[2] if isinstance(row, tuple) else row.get('DEFAULT_COLLATION_NAME')
270
+ )
271
+
272
+ def delete_database(self, name: str, tenant: str = DEFAULT_TENANT) -> None:
273
+ """
274
+ Delete database (tenant parameter ignored for embedded mode)
275
+
276
+ Args:
277
+ name: database name
278
+ tenant: ignored for embedded mode (no tenant concept)
279
+ """
280
+ logger.info(f"Deleting database: {name}")
281
+ sql = f"DROP DATABASE IF EXISTS `{name}`"
282
+ self.execute(sql)
283
+ logger.info(f"✅ Database deleted: {name}")
284
+
285
+ def list_databases(
286
+ self,
287
+ limit: Optional[int] = None,
288
+ offset: Optional[int] = None,
289
+ tenant: str = DEFAULT_TENANT
290
+ ) -> Sequence[Database]:
291
+ """
292
+ List all databases (tenant parameter ignored for embedded mode)
293
+
294
+ Args:
295
+ limit: maximum number of results to return
296
+ offset: number of results to skip
297
+ tenant: ignored for embedded mode (no tenant concept)
298
+ """
299
+ logger.info("Listing databases")
300
+ sql = "SELECT SCHEMA_NAME, DEFAULT_CHARACTER_SET_NAME, DEFAULT_COLLATION_NAME FROM information_schema.SCHEMATA"
301
+
302
+ if limit is not None:
303
+ if offset is not None:
304
+ sql += f" LIMIT {offset}, {limit}"
305
+ else:
306
+ sql += f" LIMIT {limit}"
307
+
308
+ result = self.execute(sql)
309
+
310
+ databases = []
311
+ for row in result:
312
+ databases.append(Database(
313
+ name=row[0] if isinstance(row, tuple) else row.get('SCHEMA_NAME'),
314
+ tenant=None, # No tenant concept in embedded mode
315
+ charset=row[1] if isinstance(row, tuple) else row.get('DEFAULT_CHARACTER_SET_NAME'),
316
+ collation=row[2] if isinstance(row, tuple) else row.get('DEFAULT_COLLATION_NAME')
317
+ ))
318
+
319
+ logger.info(f"✅ Found {len(databases)} databases")
320
+ return databases
321
+
322
+ def __repr__(self):
323
+ status = "connected" if self.is_connected() else "disconnected"
324
+ return f"<SeekdbEmbeddedClient path={self.path} database={self.database} status={status}>"