arcade-postgres 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
File without changes
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from typing import Any, ClassVar
|
|
2
|
+
from urllib.parse import urlparse
|
|
3
|
+
|
|
4
|
+
from arcade_tdk.errors import RetryableToolError
|
|
5
|
+
from sqlalchemy import text
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
|
|
7
|
+
|
|
8
|
+
MAX_ROWS_RETURNED = 1000
|
|
9
|
+
TEST_QUERY = "SELECT 1"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DatabaseEngine:
|
|
13
|
+
_instance: ClassVar[None] = None
|
|
14
|
+
_engines: ClassVar[dict[str, AsyncEngine]] = {}
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
async def get_instance(cls, connection_string: str) -> AsyncEngine:
|
|
18
|
+
parsed_url = urlparse(connection_string)
|
|
19
|
+
|
|
20
|
+
# TODO: something strange with sslmode= and friends
|
|
21
|
+
# query_params = parse_qs(parsed_url.query)
|
|
22
|
+
# query_params = {
|
|
23
|
+
# k: v[0] for k, v in query_params.items()
|
|
24
|
+
# } # assume one value allowed for each query param
|
|
25
|
+
|
|
26
|
+
async_connection_string = f"{parsed_url.scheme.replace('postgresql', 'postgresql+asyncpg')}://{parsed_url.netloc}{parsed_url.path}"
|
|
27
|
+
key = f"{async_connection_string}"
|
|
28
|
+
if key not in cls._engines:
|
|
29
|
+
cls._engines[key] = create_async_engine(async_connection_string)
|
|
30
|
+
|
|
31
|
+
# try a simple query to see if the connection is valid
|
|
32
|
+
try:
|
|
33
|
+
async with cls._engines[key].connect() as connection:
|
|
34
|
+
await connection.execute(text(TEST_QUERY))
|
|
35
|
+
return cls._engines[key]
|
|
36
|
+
except Exception:
|
|
37
|
+
await cls._engines[key].dispose()
|
|
38
|
+
|
|
39
|
+
# try again
|
|
40
|
+
try:
|
|
41
|
+
async with cls._engines[key].connect() as connection:
|
|
42
|
+
await connection.execute(text(TEST_QUERY))
|
|
43
|
+
return cls._engines[key]
|
|
44
|
+
except Exception as e:
|
|
45
|
+
raise RetryableToolError(
|
|
46
|
+
f"Connection failed: {e}",
|
|
47
|
+
developer_message="Connection to postgres failed.",
|
|
48
|
+
additional_prompt_content="Check the connection string and try again.",
|
|
49
|
+
) from e
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
async def get_engine(cls, connection_string: str) -> Any:
|
|
53
|
+
engine = await cls.get_instance(connection_string)
|
|
54
|
+
|
|
55
|
+
class ConnectionContextManager:
|
|
56
|
+
def __init__(self, engine: AsyncEngine) -> None:
|
|
57
|
+
self.engine = engine
|
|
58
|
+
|
|
59
|
+
async def __aenter__(self) -> AsyncEngine:
|
|
60
|
+
return self.engine
|
|
61
|
+
|
|
62
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
63
|
+
# Connection cleanup is handled by the async context manager
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
return ConnectionContextManager(engine)
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
async def cleanup(cls) -> None:
|
|
70
|
+
"""Clean up all cached engines. Call this when shutting down."""
|
|
71
|
+
for engine in cls._engines.values():
|
|
72
|
+
await engine.dispose()
|
|
73
|
+
cls._engines.clear()
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def clear_cache(cls) -> None:
|
|
77
|
+
"""Clear the engine cache without disposing engines. Use with caution."""
|
|
78
|
+
cls._engines.clear()
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def sanitize_query(cls, query: str) -> str:
|
|
82
|
+
"""
|
|
83
|
+
Sanitize a query to not break our read-only session.
|
|
84
|
+
THIS IS REALLY UNSAFE AND SHOULD NOT BE USED IN PRODUCTION. USE A DATABASE CONNECTION WITH A READ-ONLY USER AND PREPARE STATEMENTS.
|
|
85
|
+
There are also valid reasons for the ";" character, and this prevents that.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
parts = query.split(";")
|
|
89
|
+
if len(parts) > 1:
|
|
90
|
+
raise RetryableToolError(
|
|
91
|
+
"Multiple statements are not allowed in a single query.",
|
|
92
|
+
developer_message="Multiple statements are not allowed in a single query.",
|
|
93
|
+
additional_prompt_content="Split your query into multiple queries and try again.",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
words = parts[0].split(" ")
|
|
97
|
+
if words[0].upper().strip() != "SELECT":
|
|
98
|
+
raise RetryableToolError(
|
|
99
|
+
"Only SELECT queries are allowed.",
|
|
100
|
+
developer_message="Only SELECT queries are allowed.",
|
|
101
|
+
additional_prompt_content="Use the <DiscoverTables> and <GetTableSchema> tools to discover the tables and try again.",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return f"{query}"
|
|
File without changes
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from typing import Annotated, Any
|
|
2
|
+
|
|
3
|
+
from arcade_tdk import ToolContext, tool
|
|
4
|
+
from arcade_tdk.errors import RetryableToolError
|
|
5
|
+
from sqlalchemy import inspect, text
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncEngine
|
|
7
|
+
|
|
8
|
+
from ..database_engine import MAX_ROWS_RETURNED, DatabaseEngine
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@tool(requires_secrets=["DATABASE_CONNECTION_STRING"])
|
|
12
|
+
async def discover_schemas(
|
|
13
|
+
context: ToolContext,
|
|
14
|
+
) -> list[str]:
|
|
15
|
+
"""Discover all the schemas in the postgres database."""
|
|
16
|
+
async with await DatabaseEngine.get_engine(
|
|
17
|
+
context.get_secret("DATABASE_CONNECTION_STRING")
|
|
18
|
+
) as engine:
|
|
19
|
+
schemas = await _get_schemas(engine)
|
|
20
|
+
return schemas
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@tool(requires_secrets=["DATABASE_CONNECTION_STRING"])
|
|
24
|
+
async def discover_tables(
|
|
25
|
+
context: ToolContext,
|
|
26
|
+
schema_name: Annotated[
|
|
27
|
+
str, "The database schema to discover tables in (default value: 'public')"
|
|
28
|
+
] = "public",
|
|
29
|
+
) -> list[str]:
|
|
30
|
+
"""Discover all the tables in the postgres database when the list of tables is not known.
|
|
31
|
+
|
|
32
|
+
THIS TOOL SHOULD ALWAYS BE USED BEFORE ANY OTHER TOOL THAT REQUIRES A TABLE NAME.
|
|
33
|
+
"""
|
|
34
|
+
async with await DatabaseEngine.get_engine(
|
|
35
|
+
context.get_secret("DATABASE_CONNECTION_STRING")
|
|
36
|
+
) as engine:
|
|
37
|
+
tables = await _get_tables(engine, schema_name)
|
|
38
|
+
return tables
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@tool(requires_secrets=["DATABASE_CONNECTION_STRING"])
|
|
42
|
+
async def get_table_schema(
|
|
43
|
+
context: ToolContext,
|
|
44
|
+
schema_name: Annotated[str, "The database schema to get the table schema of"],
|
|
45
|
+
table_name: Annotated[str, "The table to get the schema of"],
|
|
46
|
+
) -> list[str]:
|
|
47
|
+
"""
|
|
48
|
+
Get the schema/structure of a postgres table in the postgres database when the schema is not known, and the name of the table is provided.
|
|
49
|
+
|
|
50
|
+
THIS TOOL SHOULD ALWAYS BE USED BEFORE EXECUTING ANY QUERY. ALL TABLES IN THE QUERY MUST BE DISCOVERED FIRST USING THE <DiscoverTables> TOOL.
|
|
51
|
+
"""
|
|
52
|
+
async with await DatabaseEngine.get_engine(
|
|
53
|
+
context.get_secret("DATABASE_CONNECTION_STRING")
|
|
54
|
+
) as engine:
|
|
55
|
+
return await _get_table_schema(engine, schema_name, table_name)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@tool(requires_secrets=["DATABASE_CONNECTION_STRING"])
|
|
59
|
+
async def execute_query(
|
|
60
|
+
context: ToolContext,
|
|
61
|
+
query: Annotated[str, "The postgres SQL query to execute. Only SELECT queries are allowed."],
|
|
62
|
+
) -> list[str]:
|
|
63
|
+
"""
|
|
64
|
+
You have a connection to a postgres database.
|
|
65
|
+
Execute a query and return the results against the postgres database.
|
|
66
|
+
|
|
67
|
+
ONLY USE THIS TOOL IF YOU HAVE ALREADY LOADED THE SCHEMA OF THE TABLES YOU NEED TO QUERY. USE THE <GetTableSchema> TOOL TO LOAD THE SCHEMA IF NOT ALREADY KNOWN.
|
|
68
|
+
|
|
69
|
+
When running queries, follow these rules which will help avoid errors:
|
|
70
|
+
* Always use case-insensitive queries to match strings in the query.
|
|
71
|
+
* Always trim strings in the query.
|
|
72
|
+
* Prefer LIKE queries over direct string matches or regex queries.
|
|
73
|
+
* Only join on columns that are indexed or the primary key. Do not join on arbitrary columns.
|
|
74
|
+
|
|
75
|
+
Only SELECT queries are allowed. Do not use INSERT, UPDATE, DELETE, or other DML statements. This tool will reject them.
|
|
76
|
+
|
|
77
|
+
Unless otherwise specified, ensure that query has a LIMIT of 100 for all results. This tool will enforce that no more than 1000 rows are returned at maximum.
|
|
78
|
+
"""
|
|
79
|
+
async with await DatabaseEngine.get_engine(
|
|
80
|
+
context.get_secret("DATABASE_CONNECTION_STRING")
|
|
81
|
+
) as engine:
|
|
82
|
+
try:
|
|
83
|
+
return await _execute_query(engine, query)
|
|
84
|
+
except Exception as e:
|
|
85
|
+
raise RetryableToolError(
|
|
86
|
+
f"Query failed: {e}",
|
|
87
|
+
developer_message=f"Query '{query}' failed.",
|
|
88
|
+
additional_prompt_content="Load the database schema <GetTableSchema> or use the <DiscoverTables> tool to discover the tables and try again.",
|
|
89
|
+
retry_after_ms=10,
|
|
90
|
+
) from e
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def _get_schemas(engine: AsyncEngine) -> list[str]:
|
|
94
|
+
"""Get all the schemas in the database"""
|
|
95
|
+
async with engine.connect() as conn:
|
|
96
|
+
|
|
97
|
+
def get_schema_names(sync_conn: Any) -> list[str]:
|
|
98
|
+
return list(inspect(sync_conn).get_schema_names())
|
|
99
|
+
|
|
100
|
+
schemas: list[str] = await conn.run_sync(get_schema_names)
|
|
101
|
+
schemas = [schema for schema in schemas if schema != "information_schema"]
|
|
102
|
+
|
|
103
|
+
return schemas
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
async def _get_tables(engine: AsyncEngine, schema_name: str) -> list[str]:
|
|
107
|
+
"""Get all the tables in the database"""
|
|
108
|
+
async with engine.connect() as conn:
|
|
109
|
+
|
|
110
|
+
def get_schema_names(sync_conn: Any) -> list[str]:
|
|
111
|
+
return list(inspect(sync_conn).get_schema_names())
|
|
112
|
+
|
|
113
|
+
schemas: list[str] = await conn.run_sync(get_schema_names)
|
|
114
|
+
tables = []
|
|
115
|
+
for schema in schemas:
|
|
116
|
+
if schema == schema_name:
|
|
117
|
+
|
|
118
|
+
def get_table_names(sync_conn: Any, s: str = schema) -> list[str]:
|
|
119
|
+
return list(inspect(sync_conn).get_table_names(schema=s))
|
|
120
|
+
|
|
121
|
+
these_tables = await conn.run_sync(get_table_names)
|
|
122
|
+
tables.extend(these_tables)
|
|
123
|
+
return tables
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async def _get_table_schema(engine: AsyncEngine, schema_name: str, table_name: str) -> list[str]:
|
|
127
|
+
"""Get the schema of a table"""
|
|
128
|
+
async with engine.connect() as connection:
|
|
129
|
+
|
|
130
|
+
def get_columns(sync_conn: Any, t: str = table_name, s: str = schema_name) -> list[Any]:
|
|
131
|
+
return list(inspect(sync_conn).get_columns(t, s))
|
|
132
|
+
|
|
133
|
+
columns_table = await connection.run_sync(get_columns)
|
|
134
|
+
|
|
135
|
+
# Get primary key information
|
|
136
|
+
pk_constraint = await connection.run_sync(
|
|
137
|
+
lambda sync_conn: inspect(sync_conn).get_pk_constraint(table_name, schema_name)
|
|
138
|
+
)
|
|
139
|
+
primary_keys = set(pk_constraint.get("constrained_columns", []))
|
|
140
|
+
|
|
141
|
+
# Get index information
|
|
142
|
+
indexes = await connection.run_sync(
|
|
143
|
+
lambda sync_conn: inspect(sync_conn).get_indexes(table_name, schema_name)
|
|
144
|
+
)
|
|
145
|
+
indexed_columns = set()
|
|
146
|
+
for index in indexes:
|
|
147
|
+
indexed_columns.update(index.get("column_names", []))
|
|
148
|
+
|
|
149
|
+
results = []
|
|
150
|
+
for column in columns_table:
|
|
151
|
+
column_name = column["name"]
|
|
152
|
+
column_type = column["type"].python_type.__name__
|
|
153
|
+
|
|
154
|
+
# Build column description
|
|
155
|
+
description = f"{column_name}: {column_type}"
|
|
156
|
+
|
|
157
|
+
# Add primary key indicator
|
|
158
|
+
if column_name in primary_keys:
|
|
159
|
+
description += " (PRIMARY KEY)"
|
|
160
|
+
|
|
161
|
+
# Add index indicator
|
|
162
|
+
if column_name in indexed_columns:
|
|
163
|
+
description += " (INDEXED)"
|
|
164
|
+
|
|
165
|
+
results.append(description)
|
|
166
|
+
|
|
167
|
+
return results[:MAX_ROWS_RETURNED]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
async def _execute_query(
|
|
171
|
+
engine: AsyncEngine, query: str, params: dict[str, Any] | None = None
|
|
172
|
+
) -> list[str]:
|
|
173
|
+
"""Execute a query and return the results."""
|
|
174
|
+
async with engine.connect() as connection:
|
|
175
|
+
result = await connection.execute(text(DatabaseEngine.sanitize_query(query)), params)
|
|
176
|
+
rows = result.fetchall()
|
|
177
|
+
results = [str(row) for row in rows]
|
|
178
|
+
return results[:MAX_ROWS_RETURNED]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arcade_postgres
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Tools to query and explore a postgres database
|
|
5
|
+
Author-email: evantahler <support@arcade.dev>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: arcade-tdk<3.0.0,>=2.0.0
|
|
8
|
+
Requires-Dist: asyncpg>=0.30.0
|
|
9
|
+
Requires-Dist: greenlet>=3.2.3
|
|
10
|
+
Requires-Dist: psycopg2-binary>=2.9.10
|
|
11
|
+
Requires-Dist: pydantic>=2.11.7
|
|
12
|
+
Requires-Dist: sqlalchemy>=2.0.41
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: arcade-ai[evals]<3.0.0,>=2.0.0; extra == 'dev'
|
|
15
|
+
Requires-Dist: arcade-serve<3.0.0,>=2.0.0; extra == 'dev'
|
|
16
|
+
Requires-Dist: mypy<1.6.0,>=1.5.1; extra == 'dev'
|
|
17
|
+
Requires-Dist: pre-commit<3.5.0,>=3.4.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest-asyncio<0.25.0,>=0.24.0; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest-cov<4.1.0,>=4.0.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: pytest-mock<3.12.0,>=3.11.1; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest<8.4.0,>=8.3.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: ruff<0.8.0,>=0.7.4; extra == 'dev'
|
|
23
|
+
Requires-Dist: tox<4.12.0,>=4.11.1; extra == 'dev'
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
arcade_postgres/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
arcade_postgres/database_engine.py,sha256=-HDhOk2Zgb-4F1QeYRCZdrmvv2JkaGcXFawE9tjn2nM,4088
|
|
3
|
+
arcade_postgres/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
arcade_postgres/tools/postgres.py,sha256=E4Z8Be08UbWI08iJynfS3VTM3vP7Cca9Ymz1Op7e_N4,7020
|
|
5
|
+
arcade_postgres-0.1.0.dist-info/METADATA,sha256=mWL4dI8vwmCvO0UvLo7EcPwEdeTbRFAHqNBofR5VmZY,968
|
|
6
|
+
arcade_postgres-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
+
arcade_postgres-0.1.0.dist-info/RECORD,,
|