commitdb 2.1.0__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {commitdb-2.1.0 → commitdb-2.2.0}/PKG-INFO +29 -1
- {commitdb-2.1.0 → commitdb-2.2.0}/README.md +25 -0
- {commitdb-2.1.0 → commitdb-2.2.0}/commitdb/__init__.py +9 -2
- {commitdb-2.1.0 → commitdb-2.2.0}/commitdb/client.py +66 -6
- commitdb-2.2.0/commitdb/ibis_backend.py +490 -0
- {commitdb-2.1.0 → commitdb-2.2.0}/commitdb/lib/libcommitdb-linux-amd64.so +0 -0
- {commitdb-2.1.0 → commitdb-2.2.0}/commitdb.egg-info/PKG-INFO +29 -1
- {commitdb-2.1.0 → commitdb-2.2.0}/commitdb.egg-info/SOURCES.txt +4 -1
- commitdb-2.2.0/commitdb.egg-info/entry_points.txt +2 -0
- commitdb-2.2.0/commitdb.egg-info/requires.txt +7 -0
- {commitdb-2.1.0 → commitdb-2.2.0}/pyproject.toml +5 -1
- {commitdb-2.1.0 → commitdb-2.2.0}/tests/test_client.py +4 -4
- commitdb-2.2.0/tests/test_ibis.py +88 -0
- commitdb-2.1.0/commitdb.egg-info/requires.txt +0 -3
- {commitdb-2.1.0 → commitdb-2.2.0}/commitdb/binding.py +0 -0
- {commitdb-2.1.0 → commitdb-2.2.0}/commitdb.egg-info/dependency_links.txt +0 -0
- {commitdb-2.1.0 → commitdb-2.2.0}/commitdb.egg-info/top_level.txt +0 -0
- {commitdb-2.1.0 → commitdb-2.2.0}/setup.cfg +0 -0
- {commitdb-2.1.0 → commitdb-2.2.0}/tests/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: commitdb
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: Python driver for CommitDB SQL Server
|
|
5
5
|
Author: CommitDB Contributors
|
|
6
6
|
License: Apache-2.0
|
|
@@ -19,6 +19,9 @@ Requires-Python: >=3.10
|
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
Provides-Extra: dev
|
|
21
21
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
22
|
+
Provides-Extra: ibis
|
|
23
|
+
Requires-Dist: ibis-framework>=9.0; extra == "ibis"
|
|
24
|
+
Requires-Dist: pandas>=2.0; extra == "ibis"
|
|
22
25
|
|
|
23
26
|
# CommitDB Python Driver
|
|
24
27
|
|
|
@@ -65,6 +68,31 @@ with CommitDBLocal('/path/to/data') as db: # File-based (persistent)
|
|
|
65
68
|
db.execute('CREATE DATABASE mydb')
|
|
66
69
|
```
|
|
67
70
|
|
|
71
|
+
### Ibis Mode (pandas DataFrame support)
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install commitdb[ibis]
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import ibis
|
|
79
|
+
|
|
80
|
+
con = ibis.commitdb.connect('localhost', 3306, database='mydb')
|
|
81
|
+
|
|
82
|
+
# Or use URL-based connection:
|
|
83
|
+
con = ibis.connect('commitdb://localhost:3306/mydb')
|
|
84
|
+
|
|
85
|
+
# Query with Ibis expressions
|
|
86
|
+
users = con.table('users')
|
|
87
|
+
result = users.filter(users.age > 30).select('name', 'city').execute() # → pandas DataFrame
|
|
88
|
+
print(result)
|
|
89
|
+
|
|
90
|
+
# Insert from DataFrame
|
|
91
|
+
import pandas as pd
|
|
92
|
+
df = pd.DataFrame({'id': [1, 2], 'name': ['Alice', 'Bob']})
|
|
93
|
+
con.insert('users', df)
|
|
94
|
+
```
|
|
95
|
+
|
|
68
96
|
---
|
|
69
97
|
|
|
70
98
|
## API Reference
|
|
@@ -43,6 +43,31 @@ with CommitDBLocal('/path/to/data') as db: # File-based (persistent)
|
|
|
43
43
|
db.execute('CREATE DATABASE mydb')
|
|
44
44
|
```
|
|
45
45
|
|
|
46
|
+
### Ibis Mode (pandas DataFrame support)
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install commitdb[ibis]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import ibis
|
|
54
|
+
|
|
55
|
+
con = ibis.commitdb.connect('localhost', 3306, database='mydb')
|
|
56
|
+
|
|
57
|
+
# Or use URL-based connection:
|
|
58
|
+
con = ibis.connect('commitdb://localhost:3306/mydb')
|
|
59
|
+
|
|
60
|
+
# Query with Ibis expressions
|
|
61
|
+
users = con.table('users')
|
|
62
|
+
result = users.filter(users.age > 30).select('name', 'city').execute() # → pandas DataFrame
|
|
63
|
+
print(result)
|
|
64
|
+
|
|
65
|
+
# Insert from DataFrame
|
|
66
|
+
import pandas as pd
|
|
67
|
+
df = pd.DataFrame({'id': [1, 2], 'name': ['Alice', 'Bob']})
|
|
68
|
+
con.insert('users', df)
|
|
69
|
+
```
|
|
70
|
+
|
|
46
71
|
---
|
|
47
72
|
|
|
48
73
|
## API Reference
|
|
@@ -30,7 +30,14 @@ Embedded mode (requires libcommitdb):
|
|
|
30
30
|
db.execute('CREATE DATABASE mydb')
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
34
|
+
|
|
33
35
|
from .client import CommitDB, CommitDBLocal, QueryResult, CommitResult, CommitDBError
|
|
34
36
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
try:
|
|
38
|
+
__version__ = version("commitdb")
|
|
39
|
+
except PackageNotFoundError:
|
|
40
|
+
__version__ = "0.0.0" # Fallback for development/editable installs
|
|
41
|
+
|
|
42
|
+
__all__ = ['CommitDB', 'CommitDBLocal', 'QueryResult', 'CommitResult', 'CommitDBError', '__version__']
|
|
43
|
+
|
|
@@ -20,7 +20,8 @@ class QueryResult:
|
|
|
20
20
|
columns: list[str]
|
|
21
21
|
data: list[list[str]]
|
|
22
22
|
records_read: int
|
|
23
|
-
|
|
23
|
+
execution_time_ms: float
|
|
24
|
+
execution_ops: int = 0
|
|
24
25
|
|
|
25
26
|
def __iter__(self) -> Iterator[dict[str, str]]:
|
|
26
27
|
"""Iterate over rows as dictionaries."""
|
|
@@ -43,7 +44,8 @@ class CommitResult:
|
|
|
43
44
|
tables_deleted: int = 0
|
|
44
45
|
records_written: int = 0
|
|
45
46
|
records_deleted: int = 0
|
|
46
|
-
|
|
47
|
+
execution_time_ms: float = 0.0
|
|
48
|
+
execution_ops: int = 0
|
|
47
49
|
|
|
48
50
|
@property
|
|
49
51
|
def affected_rows(self) -> int:
|
|
@@ -236,7 +238,8 @@ class CommitDB:
|
|
|
236
238
|
columns=result_data.get('columns', []),
|
|
237
239
|
data=result_data.get('data', []),
|
|
238
240
|
records_read=result_data.get('records_read', 0),
|
|
239
|
-
|
|
241
|
+
execution_time_ms=result_data.get('execution_time_ms', 0.0),
|
|
242
|
+
execution_ops=result_data.get('execution_ops', 0)
|
|
240
243
|
)
|
|
241
244
|
elif result_type == 'commit':
|
|
242
245
|
return CommitResult(
|
|
@@ -246,7 +249,8 @@ class CommitDB:
|
|
|
246
249
|
tables_deleted=result_data.get('tables_deleted', 0),
|
|
247
250
|
records_written=result_data.get('records_written', 0),
|
|
248
251
|
records_deleted=result_data.get('records_deleted', 0),
|
|
249
|
-
|
|
252
|
+
execution_time_ms=result_data.get('execution_time_ms', 0.0),
|
|
253
|
+
execution_ops=result_data.get('execution_ops', 0)
|
|
250
254
|
)
|
|
251
255
|
else:
|
|
252
256
|
# Unknown type, return empty commit result
|
|
@@ -325,6 +329,60 @@ class CommitDB:
|
|
|
325
329
|
result = self.query(f'SHOW TABLES IN {database}')
|
|
326
330
|
return [row[0] for row in result.data] if result.data else []
|
|
327
331
|
|
|
332
|
+
def create_share(self, name: str, url: str, token: str = None,
|
|
333
|
+
ssh_key: str = None, passphrase: str = None) -> CommitResult:
|
|
334
|
+
"""
|
|
335
|
+
Create a share from an external Git repository.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
name: Share name
|
|
339
|
+
url: Git repository URL
|
|
340
|
+
token: Optional authentication token (for HTTPS)
|
|
341
|
+
ssh_key: Optional path to SSH private key
|
|
342
|
+
passphrase: Optional passphrase for SSH key
|
|
343
|
+
|
|
344
|
+
Example:
|
|
345
|
+
db.create_share('sample', 'https://github.com/org/data.git')
|
|
346
|
+
db.create_share('private', 'git@github.com:org/data.git', ssh_key='~/.ssh/id_rsa')
|
|
347
|
+
"""
|
|
348
|
+
query = f"CREATE SHARE {name} FROM '{url}'"
|
|
349
|
+
if token:
|
|
350
|
+
query += f" WITH TOKEN '{token}'"
|
|
351
|
+
elif ssh_key:
|
|
352
|
+
query += f" WITH SSH KEY '{ssh_key}'"
|
|
353
|
+
if passphrase:
|
|
354
|
+
query += f" PASSPHRASE '{passphrase}'"
|
|
355
|
+
return self.execute(query)
|
|
356
|
+
|
|
357
|
+
def sync_share(self, name: str, token: str = None,
|
|
358
|
+
ssh_key: str = None, passphrase: str = None) -> CommitResult:
|
|
359
|
+
"""
|
|
360
|
+
Synchronize a share with its remote repository.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
name: Share name
|
|
364
|
+
token: Optional authentication token (for HTTPS)
|
|
365
|
+
ssh_key: Optional path to SSH private key
|
|
366
|
+
passphrase: Optional passphrase for SSH key
|
|
367
|
+
"""
|
|
368
|
+
query = f"SYNC SHARE {name}"
|
|
369
|
+
if token:
|
|
370
|
+
query += f" WITH TOKEN '{token}'"
|
|
371
|
+
elif ssh_key:
|
|
372
|
+
query += f" WITH SSH KEY '{ssh_key}'"
|
|
373
|
+
if passphrase:
|
|
374
|
+
query += f" PASSPHRASE '{passphrase}'"
|
|
375
|
+
return self.execute(query)
|
|
376
|
+
|
|
377
|
+
def drop_share(self, name: str) -> CommitResult:
|
|
378
|
+
"""Drop a share."""
|
|
379
|
+
return self.execute(f"DROP SHARE {name}")
|
|
380
|
+
|
|
381
|
+
def show_shares(self) -> list[dict[str, str]]:
|
|
382
|
+
"""List all shares."""
|
|
383
|
+
result = self.query('SHOW SHARES')
|
|
384
|
+
return [{'name': row[0], 'url': row[1]} for row in result.data] if result.data else []
|
|
385
|
+
|
|
328
386
|
|
|
329
387
|
class CommitDBLocal:
|
|
330
388
|
"""
|
|
@@ -395,7 +453,8 @@ class CommitDBLocal:
|
|
|
395
453
|
columns=result_data.get('columns', []),
|
|
396
454
|
data=result_data.get('data', []),
|
|
397
455
|
records_read=result_data.get('records_read', 0),
|
|
398
|
-
|
|
456
|
+
execution_time_ms=result_data.get('execution_time_ms', 0.0),
|
|
457
|
+
execution_ops=result_data.get('execution_ops', 0)
|
|
399
458
|
)
|
|
400
459
|
elif result_type == 'commit':
|
|
401
460
|
return CommitResult(
|
|
@@ -405,7 +464,8 @@ class CommitDBLocal:
|
|
|
405
464
|
tables_deleted=result_data.get('tables_deleted', 0),
|
|
406
465
|
records_written=result_data.get('records_written', 0),
|
|
407
466
|
records_deleted=result_data.get('records_deleted', 0),
|
|
408
|
-
|
|
467
|
+
execution_time_ms=result_data.get('execution_time_ms', 0.0),
|
|
468
|
+
execution_ops=result_data.get('execution_ops', 0)
|
|
409
469
|
)
|
|
410
470
|
else:
|
|
411
471
|
return CommitResult()
|
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CommitDB Ibis Backend
|
|
3
|
+
|
|
4
|
+
Provides an Ibis backend for CommitDB, enabling pandas DataFrame support
|
|
5
|
+
and lazy expression evaluation.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
import ibis
|
|
9
|
+
|
|
10
|
+
# Connect using ibis.commitdb
|
|
11
|
+
con = ibis.commitdb.connect('localhost', 3306, database='mydb')
|
|
12
|
+
|
|
13
|
+
# Or use URL-based connection
|
|
14
|
+
con = ibis.connect('commitdb://localhost:3306/mydb')
|
|
15
|
+
|
|
16
|
+
# Query with Ibis expressions
|
|
17
|
+
users = con.table('users')
|
|
18
|
+
result = users.filter(users.age > 30).execute() # Returns pandas DataFrame
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import contextlib
|
|
24
|
+
from typing import TYPE_CHECKING, Any
|
|
25
|
+
|
|
26
|
+
import ibis.expr.datatypes as dt
|
|
27
|
+
import ibis.expr.schema as sch
|
|
28
|
+
import ibis.expr.types as ir
|
|
29
|
+
from ibis.backends.sql import SQLBackend
|
|
30
|
+
from ibis.backends.sql.compilers import SQLiteCompiler
|
|
31
|
+
|
|
32
|
+
from . import __version__
|
|
33
|
+
|
|
34
|
+
from .client import CommitDB as CommitDBClient, CommitDBError
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
import pandas as pd
|
|
38
|
+
import sqlglot as sg
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Type mapping from CommitDB types to Ibis types
|
|
42
|
+
# Based on: STRING, INT, INTEGER, FLOAT, DOUBLE, REAL, BOOL, BOOLEAN, TEXT, DATE, TIMESTAMP, DATETIME, JSON
|
|
43
|
+
COMMITDB_TYPE_MAP = {
|
|
44
|
+
# String types
|
|
45
|
+
"STRING": dt.String,
|
|
46
|
+
"TEXT": dt.String,
|
|
47
|
+
# Integer types
|
|
48
|
+
"INT": dt.Int64,
|
|
49
|
+
"INTEGER": dt.Int64,
|
|
50
|
+
# Float types
|
|
51
|
+
"FLOAT": dt.Float64,
|
|
52
|
+
"DOUBLE": dt.Float64,
|
|
53
|
+
"REAL": dt.Float64,
|
|
54
|
+
# Boolean types
|
|
55
|
+
"BOOL": dt.Boolean,
|
|
56
|
+
"BOOLEAN": dt.Boolean,
|
|
57
|
+
# Date/time types
|
|
58
|
+
"DATE": dt.Date,
|
|
59
|
+
"TIMESTAMP": dt.Timestamp,
|
|
60
|
+
"DATETIME": dt.Timestamp,
|
|
61
|
+
# JSON type
|
|
62
|
+
"JSON": dt.JSON,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class CommitDBCompiler(SQLiteCompiler):
|
|
67
|
+
"""SQL compiler for CommitDB.
|
|
68
|
+
|
|
69
|
+
CommitDB's SQL is similar enough to SQLite that we can reuse most of it.
|
|
70
|
+
"""
|
|
71
|
+
__slots__ = ()
|
|
72
|
+
dialect = "sqlite" # Use SQLite dialect for sqlglot
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class Backend(SQLBackend):
|
|
76
|
+
"""Ibis backend for CommitDB.
|
|
77
|
+
|
|
78
|
+
This backend connects to a CommitDB server and executes queries,
|
|
79
|
+
returning results as pandas DataFrames.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
name = "commitdb"
|
|
83
|
+
compiler = CommitDBCompiler
|
|
84
|
+
supports_temporary_tables = False
|
|
85
|
+
supports_python_udfs = False
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def _from_url(cls, url, **kwargs) -> "Backend":
|
|
89
|
+
"""Create a backend from a URL.
|
|
90
|
+
|
|
91
|
+
URL format: commitdb://host:port/database
|
|
92
|
+
|
|
93
|
+
Examples
|
|
94
|
+
--------
|
|
95
|
+
>>> ibis.connect("commitdb://localhost:3306/mydb")
|
|
96
|
+
"""
|
|
97
|
+
from urllib.parse import urlparse, ParseResult
|
|
98
|
+
|
|
99
|
+
# Handle both string URLs and pre-parsed ParseResult
|
|
100
|
+
if isinstance(url, str):
|
|
101
|
+
parsed = urlparse(url)
|
|
102
|
+
else:
|
|
103
|
+
parsed = url # Already a ParseResult from ibis.connect()
|
|
104
|
+
|
|
105
|
+
host = parsed.hostname or "localhost"
|
|
106
|
+
port = parsed.port or 3306
|
|
107
|
+
database = parsed.path.lstrip("/") or None
|
|
108
|
+
|
|
109
|
+
# Parse query params for additional options
|
|
110
|
+
if parsed.query:
|
|
111
|
+
from urllib.parse import parse_qs
|
|
112
|
+
params = parse_qs(parsed.query)
|
|
113
|
+
for key, values in params.items():
|
|
114
|
+
if key not in kwargs:
|
|
115
|
+
kwargs[key] = values[0] if len(values) == 1 else values
|
|
116
|
+
|
|
117
|
+
backend = cls()
|
|
118
|
+
backend.do_connect(host=host, port=port, database=database, **kwargs)
|
|
119
|
+
return backend
|
|
120
|
+
|
|
121
|
+
def __init__(self):
|
|
122
|
+
super().__init__()
|
|
123
|
+
self._client: CommitDBClient | None = None
|
|
124
|
+
self._current_database: str | None = None
|
|
125
|
+
self._in_memory_tables: dict[str, Any] = {}
|
|
126
|
+
|
|
127
|
+
def _get_schema_using_query(self, query: str) -> sch.Schema:
|
|
128
|
+
"""Get schema by executing a query with LIMIT 1.
|
|
129
|
+
|
|
130
|
+
Note: CommitDB doesn't support subqueries, so we execute the query
|
|
131
|
+
directly with a limit to infer the schema from results.
|
|
132
|
+
"""
|
|
133
|
+
client = self._ensure_connected()
|
|
134
|
+
|
|
135
|
+
# Add LIMIT 1 if not already present to minimize data transfer
|
|
136
|
+
query_upper = query.strip().upper()
|
|
137
|
+
if "LIMIT" not in query_upper:
|
|
138
|
+
query = f"{query.rstrip().rstrip(';')} LIMIT 1"
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
result = client.query(query)
|
|
142
|
+
# Infer schema from column names (all strings for now since
|
|
143
|
+
# CommitDB returns string values over the wire)
|
|
144
|
+
fields = {col: dt.String(nullable=True) for col in result.columns}
|
|
145
|
+
return sch.Schema(fields)
|
|
146
|
+
except Exception:
|
|
147
|
+
# If query fails, return empty schema
|
|
148
|
+
return sch.Schema({})
|
|
149
|
+
|
|
150
|
+
def _register_in_memory_table(self, op: Any) -> None:
|
|
151
|
+
"""Register a table for in-memory operations.
|
|
152
|
+
|
|
153
|
+
CommitDB doesn't support in-memory tables, so we store them locally.
|
|
154
|
+
"""
|
|
155
|
+
# Store the table data for later insertion
|
|
156
|
+
name = op.name
|
|
157
|
+
self._in_memory_tables[name] = op
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def version(self) -> str:
|
|
161
|
+
"""Return CommitDB Python driver version."""
|
|
162
|
+
return __version__
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def current_database(self) -> str | None:
|
|
166
|
+
"""Return the current database."""
|
|
167
|
+
return self._current_database
|
|
168
|
+
|
|
169
|
+
def connect(self, *args, **kwargs) -> "Backend":
|
|
170
|
+
"""Connect to the database.
|
|
171
|
+
|
|
172
|
+
Creates a new backend instance and connects it.
|
|
173
|
+
"""
|
|
174
|
+
new = self.__class__()
|
|
175
|
+
new.do_connect(*args, **kwargs)
|
|
176
|
+
return new
|
|
177
|
+
|
|
178
|
+
def do_connect(
|
|
179
|
+
self,
|
|
180
|
+
host: str = "localhost",
|
|
181
|
+
port: int = 3306,
|
|
182
|
+
database: str | None = None,
|
|
183
|
+
use_ssl: bool = False,
|
|
184
|
+
ssl_verify: bool = True,
|
|
185
|
+
ssl_ca_cert: str | None = None,
|
|
186
|
+
jwt_token: str | None = None,
|
|
187
|
+
) -> None:
|
|
188
|
+
"""Connect to a CommitDB server.
|
|
189
|
+
|
|
190
|
+
Parameters
|
|
191
|
+
----------
|
|
192
|
+
host
|
|
193
|
+
Server hostname
|
|
194
|
+
port
|
|
195
|
+
Server port (default 3306)
|
|
196
|
+
database
|
|
197
|
+
Default database to use
|
|
198
|
+
use_ssl
|
|
199
|
+
Enable SSL/TLS encryption
|
|
200
|
+
ssl_verify
|
|
201
|
+
Verify server certificate
|
|
202
|
+
ssl_ca_cert
|
|
203
|
+
Path to CA certificate file
|
|
204
|
+
jwt_token
|
|
205
|
+
JWT token for authentication
|
|
206
|
+
"""
|
|
207
|
+
self._client = CommitDBClient(
|
|
208
|
+
host=host,
|
|
209
|
+
port=port,
|
|
210
|
+
use_ssl=use_ssl,
|
|
211
|
+
ssl_verify=ssl_verify,
|
|
212
|
+
ssl_ca_cert=ssl_ca_cert,
|
|
213
|
+
jwt_token=jwt_token,
|
|
214
|
+
)
|
|
215
|
+
self._client.connect()
|
|
216
|
+
self._current_database = database
|
|
217
|
+
|
|
218
|
+
def disconnect(self) -> None:
|
|
219
|
+
"""Close the connection."""
|
|
220
|
+
if self._client:
|
|
221
|
+
self._client.close()
|
|
222
|
+
self._client = None
|
|
223
|
+
|
|
224
|
+
def _ensure_connected(self) -> CommitDBClient:
|
|
225
|
+
"""Ensure we have a valid connection."""
|
|
226
|
+
if self._client is None:
|
|
227
|
+
raise CommitDBError("Not connected. Call connect() first.")
|
|
228
|
+
return self._client
|
|
229
|
+
|
|
230
|
+
def list_databases(self, *, like: str | None = None) -> list[str]:
|
|
231
|
+
"""List all databases."""
|
|
232
|
+
client = self._ensure_connected()
|
|
233
|
+
result = client.query("SHOW DATABASES")
|
|
234
|
+
databases = [row[0] for row in result.data]
|
|
235
|
+
return self._filter_with_like(databases, like)
|
|
236
|
+
|
|
237
|
+
def list_tables(
|
|
238
|
+
self, *, like: str | None = None, database: str | None = None
|
|
239
|
+
) -> list[str]:
|
|
240
|
+
"""List tables in a database."""
|
|
241
|
+
client = self._ensure_connected()
|
|
242
|
+
db = database or self._current_database
|
|
243
|
+
if not db:
|
|
244
|
+
raise CommitDBError("No database specified. Use database parameter or set current_database.")
|
|
245
|
+
|
|
246
|
+
result = client.query(f"SHOW TABLES IN {db}")
|
|
247
|
+
tables = [row[0] for row in result.data]
|
|
248
|
+
return self._filter_with_like(tables, like)
|
|
249
|
+
|
|
250
|
+
def _parse_type(self, type_str: str) -> dt.DataType:
|
|
251
|
+
"""Parse a CommitDB type string into an Ibis DataType."""
|
|
252
|
+
type_upper = type_str.upper().strip()
|
|
253
|
+
|
|
254
|
+
# Handle PRIMARY KEY suffix
|
|
255
|
+
if "PRIMARY KEY" in type_upper:
|
|
256
|
+
type_upper = type_upper.replace("PRIMARY KEY", "").strip()
|
|
257
|
+
|
|
258
|
+
type_class = COMMITDB_TYPE_MAP.get(type_upper, dt.String)
|
|
259
|
+
return type_class(nullable=True)
|
|
260
|
+
|
|
261
|
+
def get_schema(
|
|
262
|
+
self,
|
|
263
|
+
table_name: str,
|
|
264
|
+
*,
|
|
265
|
+
catalog: str | None = None,
|
|
266
|
+
database: str | None = None,
|
|
267
|
+
) -> sch.Schema:
|
|
268
|
+
"""Get the schema of a table."""
|
|
269
|
+
client = self._ensure_connected()
|
|
270
|
+
|
|
271
|
+
# Handle database.table format
|
|
272
|
+
if "." in table_name:
|
|
273
|
+
db, tbl = table_name.split(".", 1)
|
|
274
|
+
else:
|
|
275
|
+
db = database or self._current_database
|
|
276
|
+
tbl = table_name
|
|
277
|
+
if not db:
|
|
278
|
+
raise CommitDBError("No database specified.")
|
|
279
|
+
|
|
280
|
+
# Get table structure using DESCRIBE
|
|
281
|
+
result = client.query(f"DESCRIBE {db}.{tbl}")
|
|
282
|
+
|
|
283
|
+
# Parse columns: DESCRIBE returns (name, type) tuples
|
|
284
|
+
fields = {}
|
|
285
|
+
for row in result.data:
|
|
286
|
+
col_name = row[0]
|
|
287
|
+
col_type = row[1] if len(row) > 1 else "STRING"
|
|
288
|
+
fields[col_name] = self._parse_type(col_type)
|
|
289
|
+
|
|
290
|
+
return sch.Schema(fields)
|
|
291
|
+
|
|
292
|
+
def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any:
|
|
293
|
+
"""Execute raw SQL."""
|
|
294
|
+
client = self._ensure_connected()
|
|
295
|
+
if not isinstance(query, str):
|
|
296
|
+
query = query.sql(dialect="sqlite")
|
|
297
|
+
return client.execute(query)
|
|
298
|
+
|
|
299
|
+
@contextlib.contextmanager
|
|
300
|
+
def _safe_raw_sql(self, query: str | sg.Expression, **kwargs):
|
|
301
|
+
"""Execute SQL and yield the result."""
|
|
302
|
+
result = self.raw_sql(query, **kwargs)
|
|
303
|
+
try:
|
|
304
|
+
yield result
|
|
305
|
+
finally:
|
|
306
|
+
pass # No cursor to close
|
|
307
|
+
|
|
308
|
+
def execute(
|
|
309
|
+
self,
|
|
310
|
+
expr: ir.Expr,
|
|
311
|
+
params: dict | None = None,
|
|
312
|
+
limit: str | None = "default",
|
|
313
|
+
**kwargs: Any,
|
|
314
|
+
) -> pd.DataFrame:
|
|
315
|
+
"""Execute an Ibis expression and return a pandas DataFrame."""
|
|
316
|
+
import pandas as pd
|
|
317
|
+
|
|
318
|
+
# Compile expression to SQL
|
|
319
|
+
sql = self.compile(expr, params=params, limit=limit)
|
|
320
|
+
|
|
321
|
+
# Execute
|
|
322
|
+
client = self._ensure_connected()
|
|
323
|
+
result = client.query(sql)
|
|
324
|
+
|
|
325
|
+
# Convert to DataFrame
|
|
326
|
+
df = pd.DataFrame(result.data, columns=result.columns)
|
|
327
|
+
|
|
328
|
+
# Apply type conversions if needed
|
|
329
|
+
schema = expr.schema() if hasattr(expr, "schema") else None
|
|
330
|
+
if schema:
|
|
331
|
+
for col, dtype in schema.items():
|
|
332
|
+
if col in df.columns:
|
|
333
|
+
if isinstance(dtype, dt.Int64):
|
|
334
|
+
df[col] = pd.to_numeric(df[col], errors="coerce").astype("Int64")
|
|
335
|
+
elif isinstance(dtype, dt.Float64):
|
|
336
|
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
337
|
+
elif isinstance(dtype, dt.Boolean):
|
|
338
|
+
df[col] = df[col].map({"true": True, "false": False, "1": True, "0": False})
|
|
339
|
+
|
|
340
|
+
return df
|
|
341
|
+
|
|
342
|
+
def create_table(
|
|
343
|
+
self,
|
|
344
|
+
name: str,
|
|
345
|
+
obj: ir.Table | pd.DataFrame | None = None,
|
|
346
|
+
*,
|
|
347
|
+
schema: sch.Schema | None = None,
|
|
348
|
+
database: str | None = None,
|
|
349
|
+
temp: bool = False,
|
|
350
|
+
overwrite: bool = False,
|
|
351
|
+
) -> ir.Table:
|
|
352
|
+
"""Create a table."""
|
|
353
|
+
client = self._ensure_connected()
|
|
354
|
+
db = database or self._current_database
|
|
355
|
+
if not db:
|
|
356
|
+
raise CommitDBError("No database specified.")
|
|
357
|
+
|
|
358
|
+
full_name = f"{db}.{name}"
|
|
359
|
+
|
|
360
|
+
if overwrite:
|
|
361
|
+
client.execute(f"DROP TABLE IF EXISTS {full_name}")
|
|
362
|
+
|
|
363
|
+
if schema is not None:
|
|
364
|
+
# Create from schema
|
|
365
|
+
cols = ", ".join(
|
|
366
|
+
f"{col} {self._ibis_type_to_commitdb(dtype)}"
|
|
367
|
+
for col, dtype in schema.items()
|
|
368
|
+
)
|
|
369
|
+
client.execute(f"CREATE TABLE {full_name} ({cols})")
|
|
370
|
+
|
|
371
|
+
if obj is not None:
|
|
372
|
+
import pandas as pd
|
|
373
|
+
if isinstance(obj, pd.DataFrame):
|
|
374
|
+
# Insert data from DataFrame
|
|
375
|
+
self._insert_dataframe(full_name, obj)
|
|
376
|
+
|
|
377
|
+
return self.table(name, database=db)
|
|
378
|
+
|
|
379
|
+
def _ibis_type_to_commitdb(self, dtype: dt.DataType) -> str:
|
|
380
|
+
"""Convert Ibis DataType to CommitDB type string."""
|
|
381
|
+
if isinstance(dtype, (dt.Int8, dt.Int16, dt.Int32, dt.Int64,
|
|
382
|
+
dt.UInt8, dt.UInt16, dt.UInt32, dt.UInt64)):
|
|
383
|
+
return "INT"
|
|
384
|
+
elif isinstance(dtype, (dt.Float32, dt.Float64)):
|
|
385
|
+
return "FLOAT"
|
|
386
|
+
elif isinstance(dtype, dt.String):
|
|
387
|
+
return "STRING"
|
|
388
|
+
elif isinstance(dtype, dt.Boolean):
|
|
389
|
+
return "BOOL"
|
|
390
|
+
elif isinstance(dtype, dt.Date):
|
|
391
|
+
return "DATE"
|
|
392
|
+
elif isinstance(dtype, dt.Timestamp):
|
|
393
|
+
return "TIMESTAMP"
|
|
394
|
+
elif isinstance(dtype, dt.JSON):
|
|
395
|
+
return "JSON"
|
|
396
|
+
else:
|
|
397
|
+
return "STRING"
|
|
398
|
+
|
|
399
|
+
def _insert_dataframe(self, table: str, df: pd.DataFrame) -> None:
|
|
400
|
+
"""Insert a pandas DataFrame into a table."""
|
|
401
|
+
client = self._ensure_connected()
|
|
402
|
+
|
|
403
|
+
if df.empty:
|
|
404
|
+
return
|
|
405
|
+
|
|
406
|
+
cols = ", ".join(df.columns)
|
|
407
|
+
|
|
408
|
+
# Build multi-value INSERT
|
|
409
|
+
values_list = []
|
|
410
|
+
for _, row in df.iterrows():
|
|
411
|
+
vals = []
|
|
412
|
+
for v in row:
|
|
413
|
+
if v is None or (isinstance(v, float) and pd.isna(v)):
|
|
414
|
+
vals.append("NULL")
|
|
415
|
+
elif isinstance(v, str):
|
|
416
|
+
# Escape single quotes
|
|
417
|
+
escaped = v.replace("'", "''")
|
|
418
|
+
vals.append(f"'{escaped}'")
|
|
419
|
+
elif isinstance(v, bool):
|
|
420
|
+
vals.append("1" if v else "0")
|
|
421
|
+
else:
|
|
422
|
+
vals.append(str(v))
|
|
423
|
+
values_list.append(f"({', '.join(vals)})")
|
|
424
|
+
|
|
425
|
+
# Insert in batches of 100
|
|
426
|
+
batch_size = 100
|
|
427
|
+
for i in range(0, len(values_list), batch_size):
|
|
428
|
+
batch = values_list[i:i + batch_size]
|
|
429
|
+
sql = f"INSERT INTO {table} ({cols}) VALUES {', '.join(batch)}"
|
|
430
|
+
client.execute(sql)
|
|
431
|
+
|
|
432
|
+
def insert(
|
|
433
|
+
self,
|
|
434
|
+
table_name: str,
|
|
435
|
+
obj: pd.DataFrame | ir.Table,
|
|
436
|
+
*,
|
|
437
|
+
database: str | None = None,
|
|
438
|
+
) -> None:
|
|
439
|
+
"""Insert data into a table."""
|
|
440
|
+
import pandas as pd
|
|
441
|
+
|
|
442
|
+
db = database or self._current_database
|
|
443
|
+
if not db:
|
|
444
|
+
raise CommitDBError("No database specified.")
|
|
445
|
+
|
|
446
|
+
full_name = f"{db}.{table_name}"
|
|
447
|
+
|
|
448
|
+
if isinstance(obj, pd.DataFrame):
|
|
449
|
+
self._insert_dataframe(full_name, obj)
|
|
450
|
+
else:
|
|
451
|
+
# Ibis Table - execute and insert
|
|
452
|
+
df = obj.execute()
|
|
453
|
+
self._insert_dataframe(full_name, df)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def connect(
|
|
457
|
+
host: str = "localhost",
|
|
458
|
+
port: int = 3306,
|
|
459
|
+
database: str | None = None,
|
|
460
|
+
**kwargs,
|
|
461
|
+
) -> Backend:
|
|
462
|
+
"""Connect to a CommitDB server.
|
|
463
|
+
|
|
464
|
+
Parameters
|
|
465
|
+
----------
|
|
466
|
+
host
|
|
467
|
+
Server hostname
|
|
468
|
+
port
|
|
469
|
+
Server port (default 3306)
|
|
470
|
+
database
|
|
471
|
+
Default database to use
|
|
472
|
+
**kwargs
|
|
473
|
+
Additional connection parameters (use_ssl, jwt_token, etc.)
|
|
474
|
+
|
|
475
|
+
Returns
|
|
476
|
+
-------
|
|
477
|
+
Backend
|
|
478
|
+
Connected Ibis backend
|
|
479
|
+
|
|
480
|
+
Examples
|
|
481
|
+
--------
|
|
482
|
+
>>> import ibis
|
|
483
|
+
>>> con = ibis.commitdb.connect('localhost', 3306, database='mydb')
|
|
484
|
+
>>> users = con.table('users')
|
|
485
|
+
>>> users.filter(users.age > 30).execute()
|
|
486
|
+
"""
|
|
487
|
+
backend = Backend()
|
|
488
|
+
backend.do_connect(host=host, port=port, database=database, **kwargs)
|
|
489
|
+
return backend
|
|
490
|
+
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: commitdb
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: Python driver for CommitDB SQL Server
|
|
5
5
|
Author: CommitDB Contributors
|
|
6
6
|
License: Apache-2.0
|
|
@@ -19,6 +19,9 @@ Requires-Python: >=3.10
|
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
Provides-Extra: dev
|
|
21
21
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
22
|
+
Provides-Extra: ibis
|
|
23
|
+
Requires-Dist: ibis-framework>=9.0; extra == "ibis"
|
|
24
|
+
Requires-Dist: pandas>=2.0; extra == "ibis"
|
|
22
25
|
|
|
23
26
|
# CommitDB Python Driver
|
|
24
27
|
|
|
@@ -65,6 +68,31 @@ with CommitDBLocal('/path/to/data') as db: # File-based (persistent)
|
|
|
65
68
|
db.execute('CREATE DATABASE mydb')
|
|
66
69
|
```
|
|
67
70
|
|
|
71
|
+
### Ibis Mode (pandas DataFrame support)
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install commitdb[ibis]
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import ibis
|
|
79
|
+
|
|
80
|
+
con = ibis.commitdb.connect('localhost', 3306, database='mydb')
|
|
81
|
+
|
|
82
|
+
# Or use URL-based connection:
|
|
83
|
+
con = ibis.connect('commitdb://localhost:3306/mydb')
|
|
84
|
+
|
|
85
|
+
# Query with Ibis expressions
|
|
86
|
+
users = con.table('users')
|
|
87
|
+
result = users.filter(users.age > 30).select('name', 'city').execute() # → pandas DataFrame
|
|
88
|
+
print(result)
|
|
89
|
+
|
|
90
|
+
# Insert from DataFrame
|
|
91
|
+
import pandas as pd
|
|
92
|
+
df = pd.DataFrame({'id': [1, 2], 'name': ['Alice', 'Bob']})
|
|
93
|
+
con.insert('users', df)
|
|
94
|
+
```
|
|
95
|
+
|
|
68
96
|
---
|
|
69
97
|
|
|
70
98
|
## API Reference
|
|
@@ -3,11 +3,14 @@ pyproject.toml
|
|
|
3
3
|
commitdb/__init__.py
|
|
4
4
|
commitdb/binding.py
|
|
5
5
|
commitdb/client.py
|
|
6
|
+
commitdb/ibis_backend.py
|
|
6
7
|
commitdb.egg-info/PKG-INFO
|
|
7
8
|
commitdb.egg-info/SOURCES.txt
|
|
8
9
|
commitdb.egg-info/dependency_links.txt
|
|
10
|
+
commitdb.egg-info/entry_points.txt
|
|
9
11
|
commitdb.egg-info/requires.txt
|
|
10
12
|
commitdb.egg-info/top_level.txt
|
|
11
13
|
commitdb/lib/libcommitdb-linux-amd64.so
|
|
12
14
|
tests/__init__.py
|
|
13
|
-
tests/test_client.py
|
|
15
|
+
tests/test_client.py
|
|
16
|
+
tests/test_ibis.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "commitdb"
|
|
7
|
-
version = "2.
|
|
7
|
+
version = "2.2.0"
|
|
8
8
|
description = "Python driver for CommitDB SQL Server"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -36,3 +36,7 @@ commitdb = ["lib/*.so", "lib/*.dylib", "lib/*.dll"]
|
|
|
36
36
|
|
|
37
37
|
[project.optional-dependencies]
|
|
38
38
|
dev = ["pytest>=7.0"]
|
|
39
|
+
ibis = ["ibis-framework>=9.0", "pandas>=2.0"]
|
|
40
|
+
|
|
41
|
+
[project.entry-points."ibis.backends"]
|
|
42
|
+
commitdb = "commitdb.ibis_backend"
|
|
@@ -18,7 +18,7 @@ class TestQueryResult:
|
|
|
18
18
|
columns=['id', 'name'],
|
|
19
19
|
data=[['1', 'Alice'], ['2', 'Bob']],
|
|
20
20
|
records_read=2,
|
|
21
|
-
|
|
21
|
+
execution_time_ms=1.0
|
|
22
22
|
)
|
|
23
23
|
|
|
24
24
|
rows = list(result)
|
|
@@ -32,7 +32,7 @@ class TestQueryResult:
|
|
|
32
32
|
columns=['id'],
|
|
33
33
|
data=[['1'], ['2'], ['3']],
|
|
34
34
|
records_read=3,
|
|
35
|
-
|
|
35
|
+
execution_time_ms=1.0
|
|
36
36
|
)
|
|
37
37
|
assert len(result) == 3
|
|
38
38
|
|
|
@@ -41,7 +41,7 @@ class TestQueryResult:
|
|
|
41
41
|
columns=['id', 'name'],
|
|
42
42
|
data=[['1', 'Alice'], ['2', 'Bob']],
|
|
43
43
|
records_read=2,
|
|
44
|
-
|
|
44
|
+
execution_time_ms=1.0
|
|
45
45
|
)
|
|
46
46
|
assert result[0] == {'id': '1', 'name': 'Alice'}
|
|
47
47
|
assert result[1] == {'id': '2', 'name': 'Bob'}
|
|
@@ -61,7 +61,7 @@ class TestCommitResult:
|
|
|
61
61
|
def test_defaults(self):
|
|
62
62
|
result = CommitResult()
|
|
63
63
|
assert result.affected_rows == 0
|
|
64
|
-
assert result.
|
|
64
|
+
assert result.execution_time_ms == 0.0
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
class TestCommitDBUnit:
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Tests for the Ibis backend."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
# Skip all tests if ibis is not installed
|
|
6
|
+
ibis = pytest.importorskip("ibis")
|
|
7
|
+
pd = pytest.importorskip("pandas")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestIbisBackendUnit:
|
|
11
|
+
"""Unit tests for ibis backend that don't require a server."""
|
|
12
|
+
|
|
13
|
+
def test_import_backend(self):
|
|
14
|
+
"""Test that the backend can be imported."""
|
|
15
|
+
from commitdb import ibis_backend
|
|
16
|
+
assert hasattr(ibis_backend, "Backend")
|
|
17
|
+
|
|
18
|
+
def test_backend_registered(self):
|
|
19
|
+
"""Test that the backend is registered via entry points."""
|
|
20
|
+
from importlib.metadata import entry_points
|
|
21
|
+
|
|
22
|
+
# Check entry points registration
|
|
23
|
+
eps = entry_points(group='ibis.backends')
|
|
24
|
+
names = [ep.name for ep in eps]
|
|
25
|
+
assert 'commitdb' in names
|
|
26
|
+
|
|
27
|
+
def test_type_mapping(self):
|
|
28
|
+
"""Test CommitDB to Ibis type mapping."""
|
|
29
|
+
from commitdb.ibis_backend import COMMITDB_TYPE_MAP
|
|
30
|
+
import ibis.expr.datatypes as dt
|
|
31
|
+
|
|
32
|
+
assert COMMITDB_TYPE_MAP["INT"] == dt.Int64
|
|
33
|
+
assert COMMITDB_TYPE_MAP["STRING"] == dt.String
|
|
34
|
+
assert COMMITDB_TYPE_MAP["FLOAT"] == dt.Float64
|
|
35
|
+
assert COMMITDB_TYPE_MAP["BOOL"] == dt.Boolean
|
|
36
|
+
|
|
37
|
+
def test_backend_instantiation(self):
|
|
38
|
+
"""Test that the backend can be instantiated."""
|
|
39
|
+
from commitdb.ibis_backend import Backend
|
|
40
|
+
|
|
41
|
+
backend = Backend()
|
|
42
|
+
assert backend.name == "commitdb"
|
|
43
|
+
assert backend._client is None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@pytest.mark.integration
|
|
47
|
+
class TestIbisBackendIntegration:
|
|
48
|
+
"""Integration tests that require a running CommitDB server.
|
|
49
|
+
|
|
50
|
+
Run with: pytest -m integration tests/test_ibis.py
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
@pytest.fixture
|
|
54
|
+
def connection(self):
|
|
55
|
+
"""Create a connection to the test server."""
|
|
56
|
+
from commitdb.ibis_backend import Backend
|
|
57
|
+
|
|
58
|
+
backend = Backend()
|
|
59
|
+
try:
|
|
60
|
+
backend.do_connect(host="localhost", port=3306, database="test")
|
|
61
|
+
yield backend
|
|
62
|
+
finally:
|
|
63
|
+
backend.disconnect()
|
|
64
|
+
|
|
65
|
+
def test_connect(self, connection):
|
|
66
|
+
"""Test connecting to the server."""
|
|
67
|
+
assert connection._client is not None
|
|
68
|
+
|
|
69
|
+
def test_list_databases(self, connection):
|
|
70
|
+
"""Test listing databases."""
|
|
71
|
+
databases = connection.list_databases()
|
|
72
|
+
assert isinstance(databases, list)
|
|
73
|
+
|
|
74
|
+
def test_query_to_dataframe(self, connection):
|
|
75
|
+
"""Test that queries return pandas DataFrames."""
|
|
76
|
+
# This requires a table to exist
|
|
77
|
+
# Skipped if no tables exist
|
|
78
|
+
databases = connection.list_databases()
|
|
79
|
+
if not databases:
|
|
80
|
+
pytest.skip("No databases available")
|
|
81
|
+
|
|
82
|
+
tables = connection.list_tables(database=databases[0])
|
|
83
|
+
if not tables:
|
|
84
|
+
pytest.skip("No tables available")
|
|
85
|
+
|
|
86
|
+
table = connection.table(f"{databases[0]}.{tables[0]}")
|
|
87
|
+
result = table.limit(5).execute()
|
|
88
|
+
assert isinstance(result, pd.DataFrame)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|