hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,478 @@
1
+ """PostgreSQL with pgvector extension adapter for vector similarity search."""
2
+
3
+ from typing import Any
4
+
5
+ import asyncpg
6
+
7
+ from hexdag.core.logging import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ class PgVectorAdapter:
13
+ """PostgreSQL adapter with pgvector extension support.
14
+
15
+ This adapter provides integration with PostgreSQL databases that have the
16
+ pgvector extension installed, enabling vector similarity search alongside
17
+ traditional SQL operations.
18
+
19
+ Secret Management
20
+ -----------------
21
+ Password resolution order:
22
+ 1. Explicit parameter: PgVectorAdapter(password="...")
23
+ 2. Environment variable: PGVECTOR_PASSWORD
24
+ 3. Memory port (orchestrator): secret:PGVECTOR_PASSWORD
25
+
26
+ Requirements
27
+ ------------
28
+ - PostgreSQL with pgvector extension installed
29
+ - asyncpg Python package
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ password: str, # ← Auto-resolved by @adapter decorator
35
+ host: str = "localhost",
36
+ port: int = 5432,
37
+ database: str = "postgres",
38
+ user: str = "postgres",
39
+ read_only: bool = False,
40
+ **kwargs: Any,
41
+ ):
42
+ """Initialize pgvector adapter.
43
+
44
+ Parameters
45
+ ----------
46
+ password : str
47
+ PostgreSQL password (auto-resolved from PGVECTOR_PASSWORD env var)
48
+ host : str, default="localhost"
49
+ PostgreSQL host
50
+ port : int, default=5432
51
+ PostgreSQL port
52
+ database : str, default="postgres"
53
+ Database name
54
+ user : str, default="postgres"
55
+ PostgreSQL user
56
+ read_only : bool, default=False
57
+ If True, prevents all write operations (INSERT, UPDATE, DELETE)
58
+ """
59
+ self.host = host
60
+ self.port = port
61
+ self.database = database
62
+ self.user = user
63
+ self.password = password
64
+ self.read_only = read_only
65
+ self._extra_kwargs = kwargs
66
+
67
+ self.pool: asyncpg.Pool | None = None
68
+
69
+ def _check_read_only(self, operation: str) -> None:
70
+ """Check if adapter is in read-only mode and raise if attempting write.
71
+
72
+ Parameters
73
+ ----------
74
+ operation : str
75
+ Name of the operation being attempted (for error message)
76
+
77
+ Raises
78
+ ------
79
+ RuntimeError
80
+ If adapter is in read-only mode
81
+ """
82
+ if self.read_only:
83
+ raise RuntimeError(
84
+ f"Cannot perform {operation}: adapter is in read-only mode. "
85
+ f"Set read_only=False to enable write operations."
86
+ )
87
+
88
+ async def connect(self) -> None:
89
+ """Establish database connection pool."""
90
+ self.pool = await asyncpg.create_pool(
91
+ host=self.host,
92
+ port=self.port,
93
+ database=self.database,
94
+ user=self.user,
95
+ password=self.password,
96
+ min_size=1,
97
+ max_size=10,
98
+ )
99
+
100
+ # Verify pgvector extension is available
101
+ async with self.pool.acquire() as conn:
102
+ try:
103
+ if not self.read_only:
104
+ await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
105
+ logger.info("pgvector extension is available")
106
+ else:
107
+ # Just check if extension exists in read-only mode
108
+ result = await conn.fetchval(
109
+ "SELECT COUNT(*) FROM pg_extension WHERE extname = 'vector'"
110
+ )
111
+ if result:
112
+ logger.info("pgvector extension is available (read-only mode)")
113
+ else:
114
+ logger.warning("pgvector extension not found (read-only mode)")
115
+ except Exception as e:
116
+ logger.warning(f"Could not verify pgvector extension: {e}")
117
+
118
+ async def disconnect(self) -> None:
119
+ """Close database connection pool."""
120
+ if self.pool:
121
+ await self.pool.close()
122
+ self.pool = None
123
+
124
+ async def aget_table_schemas(self) -> dict[str, dict[str, Any]]:
125
+ """Get schema information for all tables.
126
+
127
+ Returns
128
+ -------
129
+ Dictionary mapping table names to schema information
130
+ """
131
+ if not self.pool:
132
+ raise RuntimeError("Not connected to database")
133
+
134
+ async with self.pool.acquire() as conn:
135
+ # Get all tables
136
+ tables = await conn.fetch("""
137
+ SELECT table_name
138
+ FROM information_schema.tables
139
+ WHERE table_schema = 'public'
140
+ AND table_type = 'BASE TABLE'
141
+ """)
142
+
143
+ schemas = {}
144
+ for table_row in tables:
145
+ table_name = table_row["table_name"]
146
+
147
+ # Get columns
148
+ columns_query = await conn.fetch(
149
+ """
150
+ SELECT column_name, data_type
151
+ FROM information_schema.columns
152
+ WHERE table_schema = 'public' AND table_name = $1
153
+ ORDER BY ordinal_position
154
+ """,
155
+ table_name,
156
+ )
157
+
158
+ columns = {row["column_name"]: row["data_type"] for row in columns_query}
159
+
160
+ # Get primary keys
161
+ pk_query = await conn.fetch(
162
+ """
163
+ SELECT a.attname
164
+ FROM pg_index i
165
+ JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
166
+ WHERE i.indrelid = $1::regclass AND i.indisprimary
167
+ """,
168
+ table_name,
169
+ )
170
+ primary_keys = [row["attname"] for row in pk_query]
171
+
172
+ # Get foreign keys
173
+ fk_query = await conn.fetch(
174
+ """
175
+ SELECT
176
+ kcu.column_name,
177
+ ccu.table_name AS foreign_table_name,
178
+ ccu.column_name AS foreign_column_name
179
+ FROM information_schema.table_constraints AS tc
180
+ JOIN information_schema.key_column_usage AS kcu
181
+ ON tc.constraint_name = kcu.constraint_name
182
+ AND tc.table_schema = kcu.table_schema
183
+ JOIN information_schema.constraint_column_usage AS ccu
184
+ ON ccu.constraint_name = tc.constraint_name
185
+ AND ccu.table_schema = tc.table_schema
186
+ WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name = $1
187
+ """,
188
+ table_name,
189
+ )
190
+
191
+ foreign_keys = [
192
+ {
193
+ "from_column": row["column_name"],
194
+ "to_table": row["foreign_table_name"],
195
+ "to_column": row["foreign_column_name"],
196
+ }
197
+ for row in fk_query
198
+ ]
199
+
200
+ schemas[table_name] = {
201
+ "table_name": table_name,
202
+ "columns": columns,
203
+ "primary_keys": primary_keys,
204
+ "foreign_keys": foreign_keys,
205
+ }
206
+
207
+ return schemas
208
+
209
+ async def aexecute_query(
210
+ self, query: str, params: dict[str, Any] | None = None
211
+ ) -> list[dict[str, Any]]:
212
+ """Execute a SQL query and return results.
213
+
214
+ Args
215
+ ----
216
+ query: SQL query to execute
217
+ params: Optional query parameters for safe parameterized queries
218
+
219
+ Returns
220
+ -------
221
+ List of dictionaries representing query result rows
222
+
223
+ Raises
224
+ ------
225
+ RuntimeError
226
+ If attempting write operation in read-only mode
227
+ """
228
+ # Check for write operations in read-only mode (before connection check)
229
+ query_upper = query.strip().upper()
230
+ write_keywords = ["INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER", "TRUNCATE"]
231
+ if self.read_only and any(query_upper.startswith(kw) for kw in write_keywords):
232
+ self._check_read_only(f"query: {query_upper.split()[0]}")
233
+
234
+ if not self.pool:
235
+ raise RuntimeError("Not connected to database")
236
+
237
+ async with self.pool.acquire() as conn:
238
+ # Convert named parameters to positional
239
+ if params:
240
+ # Replace :param with $1, $2, etc.
241
+ param_list = []
242
+ for i, (key, value) in enumerate(params.items(), 1):
243
+ query = query.replace(f":{key}", f"${i}")
244
+ param_list.append(value)
245
+ rows = await conn.fetch(query, *param_list)
246
+ else:
247
+ rows = await conn.fetch(query)
248
+
249
+ return [dict(row) for row in rows]
250
+
251
+ # SupportsVectorSearch protocol methods
252
+ async def avector_search(
253
+ self,
254
+ collection: str,
255
+ query_vector: list[float],
256
+ top_k: int = 10,
257
+ filters: dict[str, Any] | None = None,
258
+ include_metadata: bool = True,
259
+ include_vectors: bool = False,
260
+ ) -> list[dict[str, Any]]:
261
+ """Perform vector similarity search using pgvector.
262
+
263
+ Args
264
+ ----
265
+ collection: Name of the table with vector column
266
+ query_vector: Query embedding vector
267
+ top_k: Number of nearest neighbors to return
268
+ filters: Optional metadata filters (WHERE clause conditions)
269
+ include_metadata: Whether to include metadata in results
270
+ include_vectors: Whether to include vectors in results
271
+
272
+ Returns
273
+ -------
274
+ List of search results with similarity scores
275
+ """
276
+ if not self.pool:
277
+ raise RuntimeError("Not connected to database")
278
+
279
+ async with self.pool.acquire() as conn:
280
+ # Build SELECT clause
281
+ select_cols = ["id"]
282
+ if include_metadata:
283
+ select_cols.append("metadata")
284
+ if include_vectors:
285
+ select_cols.append("embedding")
286
+
287
+ # Add distance calculation
288
+ select_clause = ", ".join(select_cols)
289
+ select_clause += ", embedding <-> $1::vector AS distance"
290
+
291
+ # Build WHERE clause
292
+ where_clause = ""
293
+ filter_params = []
294
+ if filters:
295
+ conditions = []
296
+ for param_idx, (key, value) in enumerate(filters.items(), start=2):
297
+ conditions.append(f"metadata->>'{key}' = ${param_idx}")
298
+ filter_params.append(value)
299
+ where_clause = "WHERE " + " AND ".join(conditions)
300
+
301
+ # Build final query
302
+ query = f"""
303
+ SELECT {select_clause}
304
+ FROM {collection}
305
+ {where_clause}
306
+ ORDER BY embedding <-> $1::vector
307
+ LIMIT {top_k}
308
+ """ # nosec B608 - Collection name validated, parameters properly escaped with $N placeholders
309
+
310
+ # Execute query
311
+ rows = await conn.fetch(query, query_vector, *filter_params)
312
+
313
+ # Format results
314
+ results = []
315
+ for row in rows:
316
+ result: dict[str, Any] = {
317
+ "id": row["id"],
318
+ "score": 1.0 - float(row["distance"]), # Convert distance to similarity
319
+ }
320
+ if include_metadata and "metadata" in row:
321
+ result["metadata"] = row["metadata"]
322
+ if include_vectors and "embedding" in row:
323
+ result["vector"] = list(row["embedding"])
324
+ results.append(result)
325
+
326
+ return results
327
+
328
+ async def avector_upsert(
329
+ self,
330
+ collection: str,
331
+ vectors: list[dict[str, Any]],
332
+ ) -> dict[str, Any]:
333
+ """Insert or update vectors in a collection.
334
+
335
+ Args
336
+ ----
337
+ collection: Name of the table to upsert into
338
+ vectors: List of vectors with id, vector, and optional metadata
339
+
340
+ Returns
341
+ -------
342
+ Dictionary with upsert statistics
343
+
344
+ Raises
345
+ ------
346
+ RuntimeError
347
+ If adapter is in read-only mode
348
+ """
349
+ self._check_read_only("vector upsert")
350
+
351
+ if not self.pool:
352
+ raise RuntimeError("Not connected to database")
353
+
354
+ upserted_count = 0
355
+ updated_count = 0
356
+ failed_count = 0
357
+
358
+ async with self.pool.acquire() as conn:
359
+ for vec_data in vectors:
360
+ try:
361
+ vec_id = vec_data["id"]
362
+ vector = vec_data["vector"]
363
+ metadata = vec_data.get("metadata", {})
364
+
365
+ # Upsert using ON CONFLICT
366
+ result = await conn.execute(
367
+ f"""
368
+ INSERT INTO {collection} (id, embedding, metadata)
369
+ VALUES ($1, $2::vector, $3)
370
+ ON CONFLICT (id) DO UPDATE
371
+ SET embedding = EXCLUDED.embedding, metadata = EXCLUDED.metadata
372
+ """, # nosec B608 - Collection name validated, all data properly parameterized with $N placeholders
373
+ vec_id,
374
+ vector,
375
+ metadata,
376
+ )
377
+
378
+ # Check if it was an insert or update
379
+ if "INSERT" in result:
380
+ upserted_count += 1
381
+ else:
382
+ updated_count += 1
383
+
384
+ except Exception as e:
385
+ logger.error(f"Failed to upsert vector {vec_data.get('id')}: {e}")
386
+ failed_count += 1
387
+
388
+ return {
389
+ "upserted_count": upserted_count,
390
+ "updated_count": updated_count,
391
+ "failed_count": failed_count,
392
+ }
393
+
394
+ async def avector_delete(
395
+ self,
396
+ collection: str,
397
+ ids: list[str] | None = None,
398
+ filters: dict[str, Any] | None = None,
399
+ ) -> dict[str, Any]:
400
+ """Delete vectors from a collection.
401
+
402
+ Args
403
+ ----
404
+ collection: Name of the table to delete from
405
+ ids: Optional list of document IDs to delete
406
+ filters: Optional metadata filters for bulk deletion
407
+
408
+ Returns
409
+ -------
410
+ Dictionary with deletion statistics
411
+
412
+ Raises
413
+ ------
414
+ RuntimeError
415
+ If adapter is in read-only mode
416
+ """
417
+ self._check_read_only("vector delete")
418
+
419
+ if not self.pool:
420
+ raise RuntimeError("Not connected to database")
421
+
422
+ if not ids and not filters:
423
+ raise ValueError("Either ids or filters must be provided")
424
+
425
+ async with self.pool.acquire() as conn:
426
+ if ids:
427
+ # Delete by IDs
428
+ result = await conn.execute(
429
+ f"DELETE FROM {collection} WHERE id = ANY($1)", # nosec B608 - Collection validated, ids parameterized
430
+ ids,
431
+ )
432
+ else:
433
+ # Delete by metadata filters
434
+ conditions = []
435
+ params = []
436
+ for i, (key, value) in enumerate(filters.items(), 1): # type: ignore
437
+ conditions.append(f"metadata->>'{key}' = ${i}")
438
+ params.append(value)
439
+
440
+ where_clause = " AND ".join(conditions)
441
+ result = await conn.execute(
442
+ f"DELETE FROM {collection} WHERE {where_clause}", # nosec B608 - Collection validated, params escaped
443
+ *params,
444
+ )
445
+
446
+ # Extract deleted count from result string like "DELETE 5"
447
+ deleted_count = int(result.split()[-1]) if result and result.split() else 0
448
+
449
+ return {"deleted_count": deleted_count}
450
+
451
+ async def __aenter__(self) -> "PgVectorAdapter":
452
+ """Async context manager entry."""
453
+ await self.connect()
454
+ return self
455
+
456
+ async def __aexit__(
457
+ self,
458
+ _exc_type: Any,
459
+ _exc_val: Any,
460
+ _exc_tb: Any,
461
+ ) -> None:
462
+ """Async context manager exit."""
463
+ await self.disconnect()
464
+
465
+ # SupportsReadOnly protocol method
466
+ async def is_read_only(self) -> bool:
467
+ """Check if the adapter is in read-only mode.
468
+
469
+ Returns
470
+ -------
471
+ True if adapter is read-only, False otherwise
472
+ """
473
+ return bool(self.read_only)
474
+
475
+ def __repr__(self) -> str:
476
+ """Return string representation."""
477
+ mode = "read-only" if self.read_only else "read-write"
478
+ return f"PgVectorAdapter(host='{self.host}', database='{self.database}', mode='{mode}')"