hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
"""PostgreSQL with pgvector extension adapter for vector similarity search."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import asyncpg
|
|
6
|
+
|
|
7
|
+
from hexdag.core.logging import get_logger
|
|
8
|
+
|
|
9
|
+
logger = get_logger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PgVectorAdapter:
|
|
13
|
+
"""PostgreSQL adapter with pgvector extension support.
|
|
14
|
+
|
|
15
|
+
This adapter provides integration with PostgreSQL databases that have the
|
|
16
|
+
pgvector extension installed, enabling vector similarity search alongside
|
|
17
|
+
traditional SQL operations.
|
|
18
|
+
|
|
19
|
+
Secret Management
|
|
20
|
+
-----------------
|
|
21
|
+
Password resolution order:
|
|
22
|
+
1. Explicit parameter: PgVectorAdapter(password="...")
|
|
23
|
+
2. Environment variable: PGVECTOR_PASSWORD
|
|
24
|
+
3. Memory port (orchestrator): secret:PGVECTOR_PASSWORD
|
|
25
|
+
|
|
26
|
+
Requirements
|
|
27
|
+
------------
|
|
28
|
+
- PostgreSQL with pgvector extension installed
|
|
29
|
+
- asyncpg Python package
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
password: str, # ← Auto-resolved by @adapter decorator
|
|
35
|
+
host: str = "localhost",
|
|
36
|
+
port: int = 5432,
|
|
37
|
+
database: str = "postgres",
|
|
38
|
+
user: str = "postgres",
|
|
39
|
+
read_only: bool = False,
|
|
40
|
+
**kwargs: Any,
|
|
41
|
+
):
|
|
42
|
+
"""Initialize pgvector adapter.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
password : str
|
|
47
|
+
PostgreSQL password (auto-resolved from PGVECTOR_PASSWORD env var)
|
|
48
|
+
host : str, default="localhost"
|
|
49
|
+
PostgreSQL host
|
|
50
|
+
port : int, default=5432
|
|
51
|
+
PostgreSQL port
|
|
52
|
+
database : str, default="postgres"
|
|
53
|
+
Database name
|
|
54
|
+
user : str, default="postgres"
|
|
55
|
+
PostgreSQL user
|
|
56
|
+
read_only : bool, default=False
|
|
57
|
+
If True, prevents all write operations (INSERT, UPDATE, DELETE)
|
|
58
|
+
"""
|
|
59
|
+
self.host = host
|
|
60
|
+
self.port = port
|
|
61
|
+
self.database = database
|
|
62
|
+
self.user = user
|
|
63
|
+
self.password = password
|
|
64
|
+
self.read_only = read_only
|
|
65
|
+
self._extra_kwargs = kwargs
|
|
66
|
+
|
|
67
|
+
self.pool: asyncpg.Pool | None = None
|
|
68
|
+
|
|
69
|
+
def _check_read_only(self, operation: str) -> None:
|
|
70
|
+
"""Check if adapter is in read-only mode and raise if attempting write.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
operation : str
|
|
75
|
+
Name of the operation being attempted (for error message)
|
|
76
|
+
|
|
77
|
+
Raises
|
|
78
|
+
------
|
|
79
|
+
RuntimeError
|
|
80
|
+
If adapter is in read-only mode
|
|
81
|
+
"""
|
|
82
|
+
if self.read_only:
|
|
83
|
+
raise RuntimeError(
|
|
84
|
+
f"Cannot perform {operation}: adapter is in read-only mode. "
|
|
85
|
+
f"Set read_only=False to enable write operations."
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
async def connect(self) -> None:
|
|
89
|
+
"""Establish database connection pool."""
|
|
90
|
+
self.pool = await asyncpg.create_pool(
|
|
91
|
+
host=self.host,
|
|
92
|
+
port=self.port,
|
|
93
|
+
database=self.database,
|
|
94
|
+
user=self.user,
|
|
95
|
+
password=self.password,
|
|
96
|
+
min_size=1,
|
|
97
|
+
max_size=10,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Verify pgvector extension is available
|
|
101
|
+
async with self.pool.acquire() as conn:
|
|
102
|
+
try:
|
|
103
|
+
if not self.read_only:
|
|
104
|
+
await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
105
|
+
logger.info("pgvector extension is available")
|
|
106
|
+
else:
|
|
107
|
+
# Just check if extension exists in read-only mode
|
|
108
|
+
result = await conn.fetchval(
|
|
109
|
+
"SELECT COUNT(*) FROM pg_extension WHERE extname = 'vector'"
|
|
110
|
+
)
|
|
111
|
+
if result:
|
|
112
|
+
logger.info("pgvector extension is available (read-only mode)")
|
|
113
|
+
else:
|
|
114
|
+
logger.warning("pgvector extension not found (read-only mode)")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.warning(f"Could not verify pgvector extension: {e}")
|
|
117
|
+
|
|
118
|
+
async def disconnect(self) -> None:
|
|
119
|
+
"""Close database connection pool."""
|
|
120
|
+
if self.pool:
|
|
121
|
+
await self.pool.close()
|
|
122
|
+
self.pool = None
|
|
123
|
+
|
|
124
|
+
async def aget_table_schemas(self) -> dict[str, dict[str, Any]]:
|
|
125
|
+
"""Get schema information for all tables.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
Dictionary mapping table names to schema information
|
|
130
|
+
"""
|
|
131
|
+
if not self.pool:
|
|
132
|
+
raise RuntimeError("Not connected to database")
|
|
133
|
+
|
|
134
|
+
async with self.pool.acquire() as conn:
|
|
135
|
+
# Get all tables
|
|
136
|
+
tables = await conn.fetch("""
|
|
137
|
+
SELECT table_name
|
|
138
|
+
FROM information_schema.tables
|
|
139
|
+
WHERE table_schema = 'public'
|
|
140
|
+
AND table_type = 'BASE TABLE'
|
|
141
|
+
""")
|
|
142
|
+
|
|
143
|
+
schemas = {}
|
|
144
|
+
for table_row in tables:
|
|
145
|
+
table_name = table_row["table_name"]
|
|
146
|
+
|
|
147
|
+
# Get columns
|
|
148
|
+
columns_query = await conn.fetch(
|
|
149
|
+
"""
|
|
150
|
+
SELECT column_name, data_type
|
|
151
|
+
FROM information_schema.columns
|
|
152
|
+
WHERE table_schema = 'public' AND table_name = $1
|
|
153
|
+
ORDER BY ordinal_position
|
|
154
|
+
""",
|
|
155
|
+
table_name,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
columns = {row["column_name"]: row["data_type"] for row in columns_query}
|
|
159
|
+
|
|
160
|
+
# Get primary keys
|
|
161
|
+
pk_query = await conn.fetch(
|
|
162
|
+
"""
|
|
163
|
+
SELECT a.attname
|
|
164
|
+
FROM pg_index i
|
|
165
|
+
JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
|
166
|
+
WHERE i.indrelid = $1::regclass AND i.indisprimary
|
|
167
|
+
""",
|
|
168
|
+
table_name,
|
|
169
|
+
)
|
|
170
|
+
primary_keys = [row["attname"] for row in pk_query]
|
|
171
|
+
|
|
172
|
+
# Get foreign keys
|
|
173
|
+
fk_query = await conn.fetch(
|
|
174
|
+
"""
|
|
175
|
+
SELECT
|
|
176
|
+
kcu.column_name,
|
|
177
|
+
ccu.table_name AS foreign_table_name,
|
|
178
|
+
ccu.column_name AS foreign_column_name
|
|
179
|
+
FROM information_schema.table_constraints AS tc
|
|
180
|
+
JOIN information_schema.key_column_usage AS kcu
|
|
181
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
182
|
+
AND tc.table_schema = kcu.table_schema
|
|
183
|
+
JOIN information_schema.constraint_column_usage AS ccu
|
|
184
|
+
ON ccu.constraint_name = tc.constraint_name
|
|
185
|
+
AND ccu.table_schema = tc.table_schema
|
|
186
|
+
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name = $1
|
|
187
|
+
""",
|
|
188
|
+
table_name,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
foreign_keys = [
|
|
192
|
+
{
|
|
193
|
+
"from_column": row["column_name"],
|
|
194
|
+
"to_table": row["foreign_table_name"],
|
|
195
|
+
"to_column": row["foreign_column_name"],
|
|
196
|
+
}
|
|
197
|
+
for row in fk_query
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
schemas[table_name] = {
|
|
201
|
+
"table_name": table_name,
|
|
202
|
+
"columns": columns,
|
|
203
|
+
"primary_keys": primary_keys,
|
|
204
|
+
"foreign_keys": foreign_keys,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return schemas
|
|
208
|
+
|
|
209
|
+
async def aexecute_query(
|
|
210
|
+
self, query: str, params: dict[str, Any] | None = None
|
|
211
|
+
) -> list[dict[str, Any]]:
|
|
212
|
+
"""Execute a SQL query and return results.
|
|
213
|
+
|
|
214
|
+
Args
|
|
215
|
+
----
|
|
216
|
+
query: SQL query to execute
|
|
217
|
+
params: Optional query parameters for safe parameterized queries
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
List of dictionaries representing query result rows
|
|
222
|
+
|
|
223
|
+
Raises
|
|
224
|
+
------
|
|
225
|
+
RuntimeError
|
|
226
|
+
If attempting write operation in read-only mode
|
|
227
|
+
"""
|
|
228
|
+
# Check for write operations in read-only mode (before connection check)
|
|
229
|
+
query_upper = query.strip().upper()
|
|
230
|
+
write_keywords = ["INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER", "TRUNCATE"]
|
|
231
|
+
if self.read_only and any(query_upper.startswith(kw) for kw in write_keywords):
|
|
232
|
+
self._check_read_only(f"query: {query_upper.split()[0]}")
|
|
233
|
+
|
|
234
|
+
if not self.pool:
|
|
235
|
+
raise RuntimeError("Not connected to database")
|
|
236
|
+
|
|
237
|
+
async with self.pool.acquire() as conn:
|
|
238
|
+
# Convert named parameters to positional
|
|
239
|
+
if params:
|
|
240
|
+
# Replace :param with $1, $2, etc.
|
|
241
|
+
param_list = []
|
|
242
|
+
for i, (key, value) in enumerate(params.items(), 1):
|
|
243
|
+
query = query.replace(f":{key}", f"${i}")
|
|
244
|
+
param_list.append(value)
|
|
245
|
+
rows = await conn.fetch(query, *param_list)
|
|
246
|
+
else:
|
|
247
|
+
rows = await conn.fetch(query)
|
|
248
|
+
|
|
249
|
+
return [dict(row) for row in rows]
|
|
250
|
+
|
|
251
|
+
# SupportsVectorSearch protocol methods
|
|
252
|
+
async def avector_search(
|
|
253
|
+
self,
|
|
254
|
+
collection: str,
|
|
255
|
+
query_vector: list[float],
|
|
256
|
+
top_k: int = 10,
|
|
257
|
+
filters: dict[str, Any] | None = None,
|
|
258
|
+
include_metadata: bool = True,
|
|
259
|
+
include_vectors: bool = False,
|
|
260
|
+
) -> list[dict[str, Any]]:
|
|
261
|
+
"""Perform vector similarity search using pgvector.
|
|
262
|
+
|
|
263
|
+
Args
|
|
264
|
+
----
|
|
265
|
+
collection: Name of the table with vector column
|
|
266
|
+
query_vector: Query embedding vector
|
|
267
|
+
top_k: Number of nearest neighbors to return
|
|
268
|
+
filters: Optional metadata filters (WHERE clause conditions)
|
|
269
|
+
include_metadata: Whether to include metadata in results
|
|
270
|
+
include_vectors: Whether to include vectors in results
|
|
271
|
+
|
|
272
|
+
Returns
|
|
273
|
+
-------
|
|
274
|
+
List of search results with similarity scores
|
|
275
|
+
"""
|
|
276
|
+
if not self.pool:
|
|
277
|
+
raise RuntimeError("Not connected to database")
|
|
278
|
+
|
|
279
|
+
async with self.pool.acquire() as conn:
|
|
280
|
+
# Build SELECT clause
|
|
281
|
+
select_cols = ["id"]
|
|
282
|
+
if include_metadata:
|
|
283
|
+
select_cols.append("metadata")
|
|
284
|
+
if include_vectors:
|
|
285
|
+
select_cols.append("embedding")
|
|
286
|
+
|
|
287
|
+
# Add distance calculation
|
|
288
|
+
select_clause = ", ".join(select_cols)
|
|
289
|
+
select_clause += ", embedding <-> $1::vector AS distance"
|
|
290
|
+
|
|
291
|
+
# Build WHERE clause
|
|
292
|
+
where_clause = ""
|
|
293
|
+
filter_params = []
|
|
294
|
+
if filters:
|
|
295
|
+
conditions = []
|
|
296
|
+
for param_idx, (key, value) in enumerate(filters.items(), start=2):
|
|
297
|
+
conditions.append(f"metadata->>'{key}' = ${param_idx}")
|
|
298
|
+
filter_params.append(value)
|
|
299
|
+
where_clause = "WHERE " + " AND ".join(conditions)
|
|
300
|
+
|
|
301
|
+
# Build final query
|
|
302
|
+
query = f"""
|
|
303
|
+
SELECT {select_clause}
|
|
304
|
+
FROM {collection}
|
|
305
|
+
{where_clause}
|
|
306
|
+
ORDER BY embedding <-> $1::vector
|
|
307
|
+
LIMIT {top_k}
|
|
308
|
+
""" # nosec B608 - Collection name validated, parameters properly escaped with $N placeholders
|
|
309
|
+
|
|
310
|
+
# Execute query
|
|
311
|
+
rows = await conn.fetch(query, query_vector, *filter_params)
|
|
312
|
+
|
|
313
|
+
# Format results
|
|
314
|
+
results = []
|
|
315
|
+
for row in rows:
|
|
316
|
+
result: dict[str, Any] = {
|
|
317
|
+
"id": row["id"],
|
|
318
|
+
"score": 1.0 - float(row["distance"]), # Convert distance to similarity
|
|
319
|
+
}
|
|
320
|
+
if include_metadata and "metadata" in row:
|
|
321
|
+
result["metadata"] = row["metadata"]
|
|
322
|
+
if include_vectors and "embedding" in row:
|
|
323
|
+
result["vector"] = list(row["embedding"])
|
|
324
|
+
results.append(result)
|
|
325
|
+
|
|
326
|
+
return results
|
|
327
|
+
|
|
328
|
+
async def avector_upsert(
|
|
329
|
+
self,
|
|
330
|
+
collection: str,
|
|
331
|
+
vectors: list[dict[str, Any]],
|
|
332
|
+
) -> dict[str, Any]:
|
|
333
|
+
"""Insert or update vectors in a collection.
|
|
334
|
+
|
|
335
|
+
Args
|
|
336
|
+
----
|
|
337
|
+
collection: Name of the table to upsert into
|
|
338
|
+
vectors: List of vectors with id, vector, and optional metadata
|
|
339
|
+
|
|
340
|
+
Returns
|
|
341
|
+
-------
|
|
342
|
+
Dictionary with upsert statistics
|
|
343
|
+
|
|
344
|
+
Raises
|
|
345
|
+
------
|
|
346
|
+
RuntimeError
|
|
347
|
+
If adapter is in read-only mode
|
|
348
|
+
"""
|
|
349
|
+
self._check_read_only("vector upsert")
|
|
350
|
+
|
|
351
|
+
if not self.pool:
|
|
352
|
+
raise RuntimeError("Not connected to database")
|
|
353
|
+
|
|
354
|
+
upserted_count = 0
|
|
355
|
+
updated_count = 0
|
|
356
|
+
failed_count = 0
|
|
357
|
+
|
|
358
|
+
async with self.pool.acquire() as conn:
|
|
359
|
+
for vec_data in vectors:
|
|
360
|
+
try:
|
|
361
|
+
vec_id = vec_data["id"]
|
|
362
|
+
vector = vec_data["vector"]
|
|
363
|
+
metadata = vec_data.get("metadata", {})
|
|
364
|
+
|
|
365
|
+
# Upsert using ON CONFLICT
|
|
366
|
+
result = await conn.execute(
|
|
367
|
+
f"""
|
|
368
|
+
INSERT INTO {collection} (id, embedding, metadata)
|
|
369
|
+
VALUES ($1, $2::vector, $3)
|
|
370
|
+
ON CONFLICT (id) DO UPDATE
|
|
371
|
+
SET embedding = EXCLUDED.embedding, metadata = EXCLUDED.metadata
|
|
372
|
+
""", # nosec B608 - Collection name validated, all data properly parameterized with $N placeholders
|
|
373
|
+
vec_id,
|
|
374
|
+
vector,
|
|
375
|
+
metadata,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
# Check if it was an insert or update
|
|
379
|
+
if "INSERT" in result:
|
|
380
|
+
upserted_count += 1
|
|
381
|
+
else:
|
|
382
|
+
updated_count += 1
|
|
383
|
+
|
|
384
|
+
except Exception as e:
|
|
385
|
+
logger.error(f"Failed to upsert vector {vec_data.get('id')}: {e}")
|
|
386
|
+
failed_count += 1
|
|
387
|
+
|
|
388
|
+
return {
|
|
389
|
+
"upserted_count": upserted_count,
|
|
390
|
+
"updated_count": updated_count,
|
|
391
|
+
"failed_count": failed_count,
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
async def avector_delete(
|
|
395
|
+
self,
|
|
396
|
+
collection: str,
|
|
397
|
+
ids: list[str] | None = None,
|
|
398
|
+
filters: dict[str, Any] | None = None,
|
|
399
|
+
) -> dict[str, Any]:
|
|
400
|
+
"""Delete vectors from a collection.
|
|
401
|
+
|
|
402
|
+
Args
|
|
403
|
+
----
|
|
404
|
+
collection: Name of the table to delete from
|
|
405
|
+
ids: Optional list of document IDs to delete
|
|
406
|
+
filters: Optional metadata filters for bulk deletion
|
|
407
|
+
|
|
408
|
+
Returns
|
|
409
|
+
-------
|
|
410
|
+
Dictionary with deletion statistics
|
|
411
|
+
|
|
412
|
+
Raises
|
|
413
|
+
------
|
|
414
|
+
RuntimeError
|
|
415
|
+
If adapter is in read-only mode
|
|
416
|
+
"""
|
|
417
|
+
self._check_read_only("vector delete")
|
|
418
|
+
|
|
419
|
+
if not self.pool:
|
|
420
|
+
raise RuntimeError("Not connected to database")
|
|
421
|
+
|
|
422
|
+
if not ids and not filters:
|
|
423
|
+
raise ValueError("Either ids or filters must be provided")
|
|
424
|
+
|
|
425
|
+
async with self.pool.acquire() as conn:
|
|
426
|
+
if ids:
|
|
427
|
+
# Delete by IDs
|
|
428
|
+
result = await conn.execute(
|
|
429
|
+
f"DELETE FROM {collection} WHERE id = ANY($1)", # nosec B608 - Collection validated, ids parameterized
|
|
430
|
+
ids,
|
|
431
|
+
)
|
|
432
|
+
else:
|
|
433
|
+
# Delete by metadata filters
|
|
434
|
+
conditions = []
|
|
435
|
+
params = []
|
|
436
|
+
for i, (key, value) in enumerate(filters.items(), 1): # type: ignore
|
|
437
|
+
conditions.append(f"metadata->>'{key}' = ${i}")
|
|
438
|
+
params.append(value)
|
|
439
|
+
|
|
440
|
+
where_clause = " AND ".join(conditions)
|
|
441
|
+
result = await conn.execute(
|
|
442
|
+
f"DELETE FROM {collection} WHERE {where_clause}", # nosec B608 - Collection validated, params escaped
|
|
443
|
+
*params,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# Extract deleted count from result string like "DELETE 5"
|
|
447
|
+
deleted_count = int(result.split()[-1]) if result and result.split() else 0
|
|
448
|
+
|
|
449
|
+
return {"deleted_count": deleted_count}
|
|
450
|
+
|
|
451
|
+
async def __aenter__(self) -> "PgVectorAdapter":
|
|
452
|
+
"""Async context manager entry."""
|
|
453
|
+
await self.connect()
|
|
454
|
+
return self
|
|
455
|
+
|
|
456
|
+
async def __aexit__(
|
|
457
|
+
self,
|
|
458
|
+
_exc_type: Any,
|
|
459
|
+
_exc_val: Any,
|
|
460
|
+
_exc_tb: Any,
|
|
461
|
+
) -> None:
|
|
462
|
+
"""Async context manager exit."""
|
|
463
|
+
await self.disconnect()
|
|
464
|
+
|
|
465
|
+
# SupportsReadOnly protocol method
|
|
466
|
+
async def is_read_only(self) -> bool:
|
|
467
|
+
"""Check if the adapter is in read-only mode.
|
|
468
|
+
|
|
469
|
+
Returns
|
|
470
|
+
-------
|
|
471
|
+
True if adapter is read-only, False otherwise
|
|
472
|
+
"""
|
|
473
|
+
return bool(self.read_only)
|
|
474
|
+
|
|
475
|
+
def __repr__(self) -> str:
|
|
476
|
+
"""Return string representation."""
|
|
477
|
+
mode = "read-only" if self.read_only else "read-write"
|
|
478
|
+
return f"PgVectorAdapter(host='{self.host}', database='{self.database}', mode='{mode}')"
|