hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,502 @@
1
+ """PostgreSQL pgvector adapter using SQLAlchemy.
2
+
3
+ Production-ready vector store built on SQLAlchemy's battle-tested connection pool
4
+ and the official pgvector Python library.
5
+ """
6
+
7
+ import time
8
+ from typing import Any
9
+
10
+ from pydantic import ConfigDict, SecretStr, field_validator
11
+ from sqlalchemy import Column, Integer, Text, text
12
+ from sqlalchemy.dialects.postgresql import JSONB
13
+ from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine
14
+ from sqlalchemy.orm import declarative_base
15
+
16
+ from hexdag.core.configurable import AdapterConfig, ConfigurableAdapter, SecretField
17
+ from hexdag.core.ports.healthcheck import HealthStatus
18
+ from hexdag.core.registry.decorators import adapter
19
+ from hexdag.core.utils.sql_validation import validate_sql_identifier
20
+
21
+ try:
22
+ from pgvector.sqlalchemy import Vector
23
+ except ImportError:
24
+ Vector = None # type: ignore[assignment,misc]
25
+
26
+ from hexdag_plugins.storage.ports import VectorStorePort
27
+
28
+ Base = declarative_base()
29
+
30
+
31
+ class PgVectorConfig(AdapterConfig):
32
+ """Configuration for PgVector adapter with SQLAlchemy.
33
+
34
+ Attributes
35
+ ----------
36
+ connection_string : SecretStr | None
37
+ PostgreSQL connection string with asyncpg driver
38
+ (e.g., "postgresql+asyncpg://user:pass@localhost/db")
39
+ table_name : str
40
+ Name of the vector table (default: "document_embeddings")
41
+ embedding_dim : int
42
+ Dimension of embedding vectors (default: 384)
43
+ max_results : int
44
+ Maximum number of search results (default: 5)
45
+ distance_metric : str
46
+ Distance metric: "cosine", "l2", or "inner_product" (default: "cosine")
47
+ pool_size : int
48
+ SQLAlchemy connection pool size (default: 5)
49
+ max_overflow : int
50
+ Maximum overflow connections beyond pool_size (default: 10)
51
+ pool_timeout : float
52
+ Timeout for getting connection from pool (default: 30.0)
53
+ pool_recycle : int
54
+ Recycle connections after N seconds (default: 3600 - 1 hour)
55
+ pool_pre_ping : bool
56
+ Test connections before using them (default: True)
57
+ """
58
+
59
+ model_config = ConfigDict(arbitrary_types_allowed=True)
60
+
61
+ connection_string: SecretStr | None = SecretField(
62
+ env_var="PGVECTOR_CONNECTION_STRING",
63
+ description="PostgreSQL connection string (postgresql+asyncpg://...)",
64
+ )
65
+ table_name: str = "document_embeddings"
66
+ embedding_dim: int = 384
67
+ max_results: int = 5
68
+ distance_metric: str = "cosine"
69
+
70
+ # SQLAlchemy pool configuration
71
+ pool_size: int = 5
72
+ max_overflow: int = 10
73
+ pool_timeout: float = 30.0
74
+ pool_recycle: int = 3600
75
+ pool_pre_ping: bool = True
76
+
77
+ @field_validator("table_name")
78
+ @classmethod
79
+ def validate_table_name(cls, v: str) -> str:
80
+ """Validate table name to prevent SQL injection."""
81
+ validate_sql_identifier(v, identifier_type="table", raise_on_invalid=True)
82
+ if len(v) > 63:
83
+ msg = f"Table name '{v}' exceeds PostgreSQL limit of 63 characters"
84
+ raise ValueError(msg)
85
+ return v
86
+
87
+ @field_validator("distance_metric")
88
+ @classmethod
89
+ def validate_distance_metric(cls, v: str) -> str:
90
+ """Validate distance metric."""
91
+ valid_metrics = {"cosine", "l2", "inner_product"}
92
+ if v not in valid_metrics:
93
+ msg = f"Invalid distance metric: '{v}'. Must be one of: {', '.join(valid_metrics)}"
94
+ raise ValueError(msg)
95
+ return v
96
+
97
+
98
+ @adapter("vector_store", name="pgvector", namespace="plugin")
99
+ class PgVectorAdapter(ConfigurableAdapter, VectorStorePort):
100
+ """PostgreSQL pgvector adapter using SQLAlchemy.
101
+
102
+ Built on SQLAlchemy's async engine with connection pooling for production use.
103
+ Uses the official pgvector library for vector operations.
104
+
105
+ Benefits
106
+ --------
107
+ - SQLAlchemy's mature connection pooling (5-20 connections)
108
+ - Automatic connection health checks (pool_pre_ping)
109
+ - Connection recycling to prevent stale connections
110
+ - Type-safe ORM with pgvector support
111
+ - Production-ready error handling
112
+
113
+ Examples
114
+ --------
115
+ Basic usage with connection pooling::
116
+
117
+ from hexdag.core.registry import registry
118
+
119
+ pgvector = registry.get("pgvector", namespace="plugin")(
120
+ connection_string="postgresql+asyncpg://localhost/mydb",
121
+ pool_size=10,
122
+ max_overflow=20
123
+ )
124
+ await pgvector.asetup()
125
+
126
+ # Add documents
127
+ await pgvector.aadd_documents(documents, embeddings)
128
+
129
+ # Search with vector similarity
130
+ results = await pgvector.asearch(
131
+ "query text",
132
+ query_embedding=embedding,
133
+ top_k=5
134
+ )
135
+
136
+ Configuration options::
137
+
138
+ pgvector = registry.get("pgvector", namespace="plugin")(
139
+ connection_string="postgresql+asyncpg://localhost/mydb",
140
+ table_name="embeddings",
141
+ embedding_dim=1536,
142
+ pool_size=20, # Connection pool size
143
+ max_overflow=10, # Extra connections if needed
144
+ pool_recycle=3600, # Recycle after 1 hour
145
+ pool_pre_ping=True, # Check connection health
146
+ distance_metric="cosine"
147
+ )
148
+ """
149
+
150
+ Config = PgVectorConfig
151
+
152
+ def __init__(self, **kwargs: Any) -> None:
153
+ """Initialize PgVector adapter with SQLAlchemy engine."""
154
+ if Vector is None:
155
+ msg = (
156
+ "pgvector is required for PgVector adapter. "
157
+ "Install it with: uv pip install pgvector"
158
+ )
159
+ raise ImportError(msg)
160
+
161
+ super().__init__(**kwargs)
162
+ self._engine: AsyncEngine | None = None
163
+ self._model_class = None
164
+ self._initialized = False
165
+
166
+ def _create_model(self):
167
+ """Create SQLAlchemy model dynamically based on config."""
168
+ table_name = self.config.table_name
169
+ embedding_dim = self.config.embedding_dim
170
+
171
+ class EmbeddingModel(Base):
172
+ __tablename__ = table_name
173
+ __table_args__ = {"extend_existing": True}
174
+
175
+ id = Column(Integer, primary_key=True)
176
+ text = Column(Text, nullable=False)
177
+ embedding = Column(Vector(embedding_dim))
178
+ metadata = Column(JSONB, default={})
179
+
180
+ return EmbeddingModel
181
+
182
+ async def asetup(self) -> None:
183
+ """Initialize SQLAlchemy engine and create tables."""
184
+ if self.config.connection_string is None:
185
+ msg = "connection_string is required for PgVector adapter"
186
+ raise ValueError(msg)
187
+
188
+ # Create async engine with connection pool
189
+ self._engine = create_async_engine(
190
+ self.config.connection_string.get_secret_value(),
191
+ pool_size=self.config.pool_size,
192
+ max_overflow=self.config.max_overflow,
193
+ pool_timeout=self.config.pool_timeout,
194
+ pool_recycle=self.config.pool_recycle,
195
+ pool_pre_ping=self.config.pool_pre_ping,
196
+ echo=False, # Set to True for SQL debugging
197
+ )
198
+
199
+ # Create dynamic model
200
+ self._model_class = self._create_model()
201
+
202
+ # Enable pgvector extension and create tables
203
+ async with self._engine.begin() as conn:
204
+ await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
205
+ await conn.run_sync(Base.metadata.create_all)
206
+
207
+ # Create vector similarity index
208
+ distance_op = self._get_distance_operator()
209
+ index_name = f"{self.config.table_name}_embedding_idx"
210
+
211
+ # Check if index exists
212
+ index_exists = await conn.scalar(
213
+ text("SELECT EXISTS (SELECT 1 FROM pg_indexes WHERE indexname = :index_name)"),
214
+ {"index_name": index_name},
215
+ )
216
+
217
+ if not index_exists:
218
+ await conn.execute(
219
+ text(f"""
220
+ CREATE INDEX {index_name}
221
+ ON {self.config.table_name}
222
+ USING ivfflat (embedding {distance_op})
223
+ WITH (lists = 100)
224
+ """)
225
+ )
226
+
227
+ self._initialized = True
228
+
229
+ async def aclose(self) -> None:
230
+ """Close SQLAlchemy engine and connection pool."""
231
+ if self._engine:
232
+ await self._engine.dispose()
233
+ self._engine = None
234
+ self._initialized = False
235
+
236
+ async def aadd_documents(
237
+ self,
238
+ documents: list[dict[str, Any]],
239
+ embeddings: list[list[float]] | None = None,
240
+ ) -> dict[str, Any]:
241
+ """Add documents with embeddings to the vector store."""
242
+ if not self._initialized:
243
+ await self.asetup()
244
+
245
+ if embeddings is None:
246
+ msg = "Embeddings are required for PgVector adapter. Example: embeddings = \
247
+ await embedding_adapter.aembed_texts([doc['text'] for doc in documents])"
248
+ raise ValueError(msg)
249
+
250
+ if len(documents) != len(embeddings):
251
+ msg = f"Mismatch: {len(documents)} documents but {len(embeddings)} embeddings"
252
+ raise ValueError(msg)
253
+
254
+ inserted_ids = []
255
+ async with AsyncSession(self._engine) as session:
256
+ for doc, embedding in zip(documents, embeddings, strict=False):
257
+ record = self._model_class(
258
+ text=doc.get("text", ""),
259
+ embedding=embedding,
260
+ metadata=doc.get("metadata", {}),
261
+ )
262
+ session.add(record)
263
+
264
+ await session.commit()
265
+
266
+ # Get IDs of inserted records (need to refresh to get IDs)
267
+ inserted_ids.extend(record.id for record in session.new if hasattr(record, "id"))
268
+
269
+ return {
270
+ "added": len(documents),
271
+ "ids": inserted_ids,
272
+ }
273
+
274
+ async def asearch(
275
+ self,
276
+ query: str,
277
+ query_embedding: list[float] | None = None,
278
+ top_k: int | None = None,
279
+ filter_metadata: dict[str, Any] | None = None,
280
+ ) -> list[dict[str, Any]]:
281
+ """Search for similar documents using vector similarity."""
282
+ if not self._initialized:
283
+ await self.asetup()
284
+
285
+ if query_embedding is None:
286
+ msg = (
287
+ "Query embedding is required for PgVector search. "
288
+ "Example: query_embedding = await embedding_adapter.aembed_text(query)"
289
+ )
290
+ raise ValueError(msg)
291
+
292
+ k = top_k or self.config.max_results
293
+
294
+ # Build query with distance function
295
+ distance_func = self._get_distance_function()
296
+
297
+ async with AsyncSession(self._engine) as session:
298
+ # Build base query
299
+ query_obj = session.query(self._model_class).order_by(
300
+ distance_func(self._model_class.embedding, query_embedding)
301
+ )
302
+
303
+ # Add metadata filtering if provided
304
+ if filter_metadata:
305
+ for key, value in filter_metadata.items():
306
+ query_obj = query_obj.filter(
307
+ self._model_class.metadata[key].astext == str(value)
308
+ )
309
+
310
+ # Execute query
311
+ query_obj = query_obj.limit(k)
312
+ results = (await session.execute(query_obj)).scalars().all()
313
+
314
+ # Format results
315
+ formatted_results = []
316
+ for record in results:
317
+ # Calculate similarity score
318
+ distance = distance_func(record.embedding, query_embedding)
319
+ similarity = (
320
+ 1 - distance if self.config.distance_metric in ["cosine", "l2"] else distance
321
+ )
322
+
323
+ formatted_results.append({
324
+ "id": record.id,
325
+ "text": record.text,
326
+ "metadata": record.metadata or {},
327
+ "similarity_score": float(similarity),
328
+ })
329
+
330
+ return formatted_results
331
+
332
+ async def aclear(self) -> dict[str, Any]:
333
+ """Clear all documents from the table."""
334
+ if not self._initialized:
335
+ await self.asetup()
336
+
337
+ async with AsyncSession(self._engine) as session:
338
+ # Count before deletion
339
+ count = await session.scalar(text(f"SELECT COUNT(*) FROM {self.config.table_name}"))
340
+
341
+ # Truncate table
342
+ await session.execute(text(f"TRUNCATE TABLE {self.config.table_name}"))
343
+ await session.commit()
344
+
345
+ return {"removed": count}
346
+
347
+ async def acount(self) -> int:
348
+ """Get the number of documents in the vector store."""
349
+ if not self._initialized:
350
+ await self.asetup()
351
+
352
+ async with AsyncSession(self._engine) as session:
353
+ count = await session.scalar(text(f"SELECT COUNT(*) FROM {self.config.table_name}"))
354
+ return count or 0
355
+
356
+ async def aget_stats(self) -> dict[str, Any]:
357
+ """Get statistics about the vector store and connection pool."""
358
+ if not self._initialized:
359
+ await self.asetup()
360
+
361
+ async with AsyncSession(self._engine) as session:
362
+ # Get document count
363
+ count = await session.scalar(text(f"SELECT COUNT(*) FROM {self.config.table_name}"))
364
+
365
+ # Get table size
366
+ size = await session.scalar(
367
+ text(f"SELECT pg_size_pretty(pg_total_relation_size('{self.config.table_name}'))")
368
+ )
369
+
370
+ # Get pool statistics
371
+ pool_stats = {}
372
+ if self._engine and self._engine.pool:
373
+ pool = self._engine.pool
374
+ pool_stats = {
375
+ "pool_size": pool.size(),
376
+ "checked_in": pool.checkedin(),
377
+ "checked_out": pool.checkedout(),
378
+ "overflow": pool.overflow(),
379
+ "max_overflow": self.config.max_overflow,
380
+ }
381
+
382
+ return {
383
+ "document_count": count,
384
+ "table_name": self.config.table_name,
385
+ "table_size": size,
386
+ "embedding_dim": self.config.embedding_dim,
387
+ "distance_metric": self.config.distance_metric,
388
+ "connection_pool": pool_stats,
389
+ }
390
+
391
+ def _get_distance_operator(self) -> str:
392
+ """Get PostgreSQL distance operator for the configured metric."""
393
+ operators = {
394
+ "cosine": "<=>",
395
+ "l2": "<->",
396
+ "inner_product": "<#>",
397
+ }
398
+ return operators.get(self.config.distance_metric, "<=>")
399
+
400
+ def _get_distance_function(self):
401
+ """Get pgvector distance function for SQLAlchemy."""
402
+ from pgvector.sqlalchemy import Vector
403
+
404
+ metric_map = {
405
+ "cosine": "cosine_distance",
406
+ "l2": "l2_distance",
407
+ "inner_product": "max_inner_product",
408
+ }
409
+ func_name = metric_map.get(self.config.distance_metric, "cosine_distance")
410
+ return getattr(Vector, func_name, Vector.cosine_distance)
411
+
412
+ async def ahealth_check(self) -> HealthStatus:
413
+ """Check PostgreSQL, pgvector, and connection pool health."""
414
+ start_time = time.time()
415
+
416
+ try:
417
+ if not self._initialized:
418
+ await self.asetup()
419
+
420
+ async with AsyncSession(self._engine) as session:
421
+ # Test basic connectivity
422
+ await session.execute(text("SELECT 1"))
423
+
424
+ # Test pgvector extension
425
+ await session.execute(text("SELECT vector_dims(vector('[1,2,3]'))"))
426
+
427
+ # Test table exists
428
+ table_exists = await session.scalar(
429
+ text(
430
+ """
431
+ SELECT EXISTS (
432
+ SELECT FROM information_schema.tables
433
+ WHERE table_name = :table_name
434
+ )
435
+ """
436
+ ),
437
+ {"table_name": self.config.table_name},
438
+ )
439
+
440
+ latency_ms = (time.time() - start_time) * 1000
441
+
442
+ if not table_exists:
443
+ return HealthStatus(
444
+ status="degraded",
445
+ adapter_name="pgvector",
446
+ port_name="vector_store",
447
+ latency_ms=latency_ms,
448
+ details={
449
+ "table": self.config.table_name,
450
+ "message": "Table does not exist (not yet initialized)",
451
+ },
452
+ )
453
+
454
+ # Get document count
455
+ count = await self.acount()
456
+
457
+ # Get pool statistics
458
+ pool_stats = {}
459
+ if self._engine and self._engine.pool:
460
+ pool = self._engine.pool
461
+ pool_stats = {
462
+ "pool_size": pool.size(),
463
+ "checked_in": pool.checkedin(),
464
+ "checked_out": pool.checkedout(),
465
+ }
466
+
467
+ return HealthStatus(
468
+ status="healthy",
469
+ adapter_name="pgvector",
470
+ port_name="vector_store",
471
+ latency_ms=latency_ms,
472
+ details={
473
+ "table": self.config.table_name,
474
+ "document_count": count,
475
+ "embedding_dim": self.config.embedding_dim,
476
+ "distance_metric": self.config.distance_metric,
477
+ "connection_pool": pool_stats,
478
+ "sqlalchemy_version": "async",
479
+ },
480
+ )
481
+
482
+ except Exception as e:
483
+ latency_ms = (time.time() - start_time) * 1000
484
+ return HealthStatus(
485
+ status="unhealthy",
486
+ adapter_name="pgvector",
487
+ port_name="vector_store",
488
+ error=e,
489
+ latency_ms=latency_ms,
490
+ details={
491
+ "table": self.config.table_name,
492
+ "error_type": type(e).__name__,
493
+ },
494
+ )
495
+
496
+ def __repr__(self) -> str:
497
+ """String representation."""
498
+ pool_info = ""
499
+ if self._engine and self._engine.pool:
500
+ pool = self._engine.pool
501
+ pool_info = f", pool={pool.checkedout()}/{pool.size()}"
502
+ return f"PgVectorAdapter(table={self.config.table_name}{pool_info})"