hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,223 @@
1
+ """ChromaDB vector store adapter for RAG plugin.
2
+
3
+ ChromaDB is an open-source embedding database that provides:
4
+ - Easy local development and deployment
5
+ - Built-in embedding models
6
+ - Persistent storage
7
+ - Cloud deployment option
8
+
9
+ Installation:
10
+ pip install chromadb
11
+ """
12
+
13
+ from typing import Any
14
+
15
+ from pydantic import Field
16
+
17
+ from hexdag.core import AdapterConfig, ConfigurableAdapter
18
+ from hexdag.core.registry.decorators import adapter
19
+ from hexdag_plugins.storage.ports import VectorStorePort
20
+
21
+
22
+ class ChromaDBConfig(AdapterConfig):
23
+ """Configuration for ChromaDB adapter.
24
+
25
+ Attributes
26
+ ----------
27
+ collection_name : str
28
+ Name of the ChromaDB collection (default: "hexdag_documents")
29
+ persist_directory : str | None
30
+ Directory for persistent storage (None for in-memory)
31
+ embedding_function : str
32
+ Embedding function to use (default: "default")
33
+ Options: "default", "sentence-transformers", "openai"
34
+ distance_metric : str
35
+ Distance metric for similarity (default: "cosine")
36
+ Options: "cosine", "l2", "ip" (inner product)
37
+ """
38
+
39
+ collection_name: str = "hexdag_documents"
40
+ persist_directory: str | None = None
41
+ embedding_function: str = "default"
42
+ distance_metric: str = Field(default="cosine", pattern="^(cosine|l2|ip)$")
43
+
44
+
45
+ @adapter("vector_store", name="chromadb", namespace="plugin")
46
+ class ChromaDBAdapter(ConfigurableAdapter, VectorStorePort):
47
+ """ChromaDB vector store adapter.
48
+
49
+ Provides persistent vector storage with built-in embedding support.
50
+
51
+ Examples
52
+ --------
53
+ >>> # In-memory ChromaDB
54
+ >>> store = ChromaDBAdapter(collection_name="docs")
55
+ >>> await store.aadd_documents([{"text": "Python programming"}])
56
+ >>> results = await store.asearch("Python", top_k=5)
57
+
58
+ >>> # Persistent ChromaDB
59
+ >>> store = ChromaDBAdapter(
60
+ ... collection_name="docs",
61
+ ... persist_directory="./chroma_db"
62
+ ... )
63
+ """
64
+
65
+ Config = ChromaDBConfig
66
+
67
+ def __init__(self, **kwargs):
68
+ """Initialize ChromaDB adapter."""
69
+ super().__init__(**kwargs)
70
+ self._client = None
71
+ self._collection = None
72
+
73
+ async def asetup(self):
74
+ """Initialize ChromaDB client and collection."""
75
+ try:
76
+ import chromadb
77
+ from chromadb.config import Settings
78
+ except ImportError as e:
79
+ msg = "ChromaDB not installed. Install with: pip install chromadb"
80
+ raise ImportError(msg) from e
81
+
82
+ # Create client
83
+ if self.config.persist_directory:
84
+ settings = Settings(
85
+ persist_directory=self.config.persist_directory,
86
+ anonymized_telemetry=False,
87
+ )
88
+ self._client = chromadb.Client(settings)
89
+ else:
90
+ self._client = chromadb.Client()
91
+
92
+ # Get or create collection
93
+ self._collection = self._client.get_or_create_collection(
94
+ name=self.config.collection_name,
95
+ metadata={"distance_metric": self.config.distance_metric},
96
+ )
97
+
98
+ async def aadd_documents(
99
+ self,
100
+ documents: list[dict[str, Any]],
101
+ embeddings: list[list[float]] | None = None,
102
+ ) -> None:
103
+ """Add documents to ChromaDB.
104
+
105
+ ChromaDB can generate embeddings automatically if not provided.
106
+
107
+ Args:
108
+ documents: List of documents with 'text' and optional 'metadata'
109
+ embeddings: Optional pre-computed embeddings (if None, ChromaDB generates)
110
+ """
111
+ if not self._collection:
112
+ await self.asetup()
113
+
114
+ texts = [doc["text"] for doc in documents]
115
+ metadatas = [doc.get("metadata", {}) for doc in documents]
116
+ ids = [doc.get("id", f"doc_{i}") for i, doc in enumerate(documents)]
117
+
118
+ if embeddings:
119
+ # Use provided embeddings
120
+ self._collection.add(
121
+ documents=texts,
122
+ embeddings=embeddings,
123
+ metadatas=metadatas,
124
+ ids=ids,
125
+ )
126
+ else:
127
+ # Let ChromaDB generate embeddings
128
+ self._collection.add(
129
+ documents=texts,
130
+ metadatas=metadatas,
131
+ ids=ids,
132
+ )
133
+
134
+ async def asearch(
135
+ self,
136
+ query: str,
137
+ query_embedding: list[float] | None = None,
138
+ top_k: int | None = None,
139
+ filter_metadata: dict[str, Any] | None = None,
140
+ ) -> list[dict[str, Any]]:
141
+ """Search for similar documents in ChromaDB.
142
+
143
+ Args:
144
+ query: Search query text
145
+ query_embedding: Optional pre-computed query embedding
146
+ top_k: Number of results to return (default: from config or 5)
147
+ filter_metadata: Optional metadata filters (ChromaDB where clause)
148
+
149
+ Returns:
150
+ List of matching documents with scores and metadata
151
+ """
152
+ if not self._collection:
153
+ await self.asetup()
154
+
155
+ k = top_k or 5
156
+
157
+ # Build where clause from filter_metadata
158
+ where = None
159
+ if filter_metadata:
160
+ where = filter_metadata
161
+
162
+ if query_embedding:
163
+ # Use provided embedding
164
+ results = self._collection.query(
165
+ query_embeddings=[query_embedding],
166
+ n_results=k,
167
+ where=where,
168
+ )
169
+ else:
170
+ # Let ChromaDB embed the query
171
+ results = self._collection.query(
172
+ query_texts=[query],
173
+ n_results=k,
174
+ where=where,
175
+ )
176
+
177
+ # Format results
178
+ return [
179
+ {
180
+ "id": results["ids"][0][i],
181
+ "text": results["documents"][0][i],
182
+ "score": 1.0 - results["distances"][0][i], # Convert distance to similarity
183
+ "metadata": results["metadatas"][0][i] if results["metadatas"] else {},
184
+ }
185
+ for i in range(len(results["ids"][0]))
186
+ ]
187
+
188
+ async def aclear(self) -> None:
189
+ """Clear all documents from the collection."""
190
+ if not self._collection:
191
+ await self.asetup()
192
+
193
+ # Delete and recreate collection
194
+ self._client.delete_collection(name=self.config.collection_name)
195
+ self._collection = self._client.get_or_create_collection(
196
+ name=self.config.collection_name,
197
+ metadata={"distance_metric": self.config.distance_metric},
198
+ )
199
+
200
+ async def acount(self) -> int:
201
+ """Get the number of documents in the collection."""
202
+ if not self._collection:
203
+ await self.asetup()
204
+
205
+ return self._collection.count()
206
+
207
+ async def adelete(self, ids: list[str]) -> None:
208
+ """Delete documents by ID.
209
+
210
+ Args:
211
+ ids: List of document IDs to delete
212
+ """
213
+ if not self._collection:
214
+ await self.asetup()
215
+
216
+ self._collection.delete(ids=ids)
217
+
218
+ def __repr__(self) -> str:
219
+ """String representation."""
220
+ return (
221
+ f"ChromaDBAdapter(collection={self.config.collection_name}, "
222
+ f"persist={self.config.persist_directory is not None})"
223
+ )
@@ -0,0 +1,285 @@
1
+ """In-memory vector store for RAG operations."""
2
+
3
+ import hashlib
4
+ import math
5
+ from typing import Any
6
+
7
+ from hexdag.core.configurable import AdapterConfig, ConfigurableAdapter
8
+ from hexdag.core.registry.decorators import adapter
9
+ from hexdag_plugins.storage.ports import VectorStorePort
10
+
11
+
12
+ class VectorStoreConfig(AdapterConfig):
13
+ """Configuration for in-memory vector store.
14
+
15
+ Attributes
16
+ ----------
17
+ embedding_dim : int
18
+ Dimension of embedding vectors (default: 384 for sentence-transformers)
19
+ max_results : int
20
+ Maximum number of results to return from search (default: 5)
21
+ """
22
+
23
+ embedding_dim: int = 384
24
+ max_results: int = 5
25
+
26
+
27
+ @adapter("vector_store", name="in_memory_vector", namespace="plugin")
28
+ class InMemoryVectorStore(ConfigurableAdapter, VectorStorePort):
29
+ """In-memory vector store for RAG operations.
30
+
31
+ Stores text chunks with embeddings and provides similarity search.
32
+ Uses simple cosine similarity for retrieval.
33
+
34
+ Examples
35
+ --------
36
+ Store and search documents::
37
+
38
+ from hexdag.core.registry import registry
39
+
40
+ vector_store = registry.get("in_memory_vector", namespace="plugin")
41
+
42
+ # Add documents
43
+ await vector_store.aadd_documents([
44
+ {"text": "Python is a programming language", "id": "doc1"},
45
+ {"text": "Machine learning uses algorithms", "id": "doc2"},
46
+ ])
47
+
48
+ # Search
49
+ results = await vector_store.asearch("programming", top_k=2)
50
+ """
51
+
52
+ Config = VectorStoreConfig
53
+
54
+ def __init__(self, **kwargs: Any) -> None:
55
+ """Initialize vector store."""
56
+ super().__init__(**kwargs)
57
+ self._documents: list[dict[str, Any]] = []
58
+ self._embeddings: list[list[float]] = []
59
+
60
+ async def aadd_documents(
61
+ self,
62
+ documents: list[dict[str, Any]],
63
+ embeddings: list[list[float]] | None = None,
64
+ ) -> dict[str, Any]:
65
+ """Add documents to the vector store.
66
+
67
+ Parameters
68
+ ----------
69
+ documents : list[dict[str, Any]]
70
+ Documents to add (must have 'text' field)
71
+ embeddings : list[list[float]] | None
72
+ Pre-computed embeddings (if None, uses simple hash-based embedding)
73
+
74
+ Returns
75
+ -------
76
+ dict[str, Any]
77
+ Result with count of added documents
78
+ """
79
+ if embeddings is None:
80
+ # Generate simple embeddings if none provided
81
+ embeddings = [self._simple_embedding(doc["text"]) for doc in documents]
82
+
83
+ if len(documents) != len(embeddings):
84
+ msg = "Number of documents must match number of embeddings"
85
+ raise ValueError(msg)
86
+
87
+ self._documents.extend(documents)
88
+ self._embeddings.extend(embeddings)
89
+
90
+ return {"added": len(documents), "total": len(self._documents)}
91
+
92
+ async def asearch(
93
+ self,
94
+ query: str,
95
+ query_embedding: list[float] | None = None,
96
+ top_k: int | None = None,
97
+ ) -> list[dict[str, Any]]:
98
+ """Search for similar documents.
99
+
100
+ Parameters
101
+ ----------
102
+ query : str
103
+ Query text
104
+ query_embedding : list[float] | None
105
+ Pre-computed query embedding (if None, generates from query text)
106
+ top_k : int | None
107
+ Number of results to return (uses config.max_results if None)
108
+
109
+ Returns
110
+ -------
111
+ list[dict[str, Any]]
112
+ Top-k most similar documents with similarity scores
113
+ """
114
+ if not self._documents:
115
+ return []
116
+
117
+ if query_embedding is None:
118
+ query_embedding = self._simple_embedding(query)
119
+
120
+ k = top_k if top_k is not None else self.config.max_results
121
+
122
+ # Return empty list if top_k is explicitly 0
123
+ if k == 0:
124
+ return []
125
+
126
+ # Calculate similarities
127
+ similarities = []
128
+ for i, doc_embedding in enumerate(self._embeddings):
129
+ sim = self._cosine_similarity(query_embedding, doc_embedding)
130
+ similarities.append((i, sim))
131
+
132
+ # Sort by similarity and return top-k
133
+ similarities.sort(key=lambda x: x[1], reverse=True)
134
+ results = []
135
+
136
+ for idx, score in similarities[:k]:
137
+ result = self._documents[idx].copy()
138
+ result["similarity_score"] = score
139
+ results.append(result)
140
+
141
+ return results
142
+
143
+ async def aclear(self) -> dict[str, Any]:
144
+ """Clear all documents from the store.
145
+
146
+ Returns
147
+ -------
148
+ dict[str, Any]
149
+ Result with count of removed documents
150
+ """
151
+ count = len(self._documents)
152
+ self._documents.clear()
153
+ self._embeddings.clear()
154
+ return {"removed": count}
155
+
156
+ async def adelete(self, ids: list[str]) -> dict[str, Any]:
157
+ """Delete documents by ID.
158
+
159
+ Parameters
160
+ ----------
161
+ ids : list[str]
162
+ List of document IDs to delete
163
+
164
+ Returns
165
+ -------
166
+ dict[str, Any]
167
+ Result with count of deleted documents
168
+ """
169
+ deleted_count = 0
170
+ indices_to_remove = []
171
+
172
+ # Find indices of documents to delete
173
+ for i, doc in enumerate(self._documents):
174
+ if doc.get("id") in ids:
175
+ indices_to_remove.append(i)
176
+ deleted_count += 1
177
+
178
+ # Remove in reverse order to maintain indices
179
+ for idx in reversed(indices_to_remove):
180
+ del self._documents[idx]
181
+ del self._embeddings[idx]
182
+
183
+ return {"deleted": deleted_count}
184
+
185
+ async def acount(self) -> int:
186
+ """Get the number of documents in the vector store.
187
+
188
+ Returns
189
+ -------
190
+ int
191
+ Number of documents currently stored
192
+ """
193
+ return len(self._documents)
194
+
195
+ async def aget_stats(self) -> dict[str, Any]:
196
+ """Get vector store statistics.
197
+
198
+ Returns
199
+ -------
200
+ dict[str, Any]
201
+ Statistics about stored documents
202
+ """
203
+ return {
204
+ "document_count": len(self._documents),
205
+ "embedding_dim": self.config.embedding_dim,
206
+ "max_results": self.config.max_results,
207
+ }
208
+
209
+ def get_stats(self) -> dict[str, Any]:
210
+ """Get vector store statistics (sync version for backwards compatibility).
211
+
212
+ Returns
213
+ -------
214
+ dict[str, Any]
215
+ Statistics about stored documents
216
+ """
217
+ return {
218
+ "document_count": len(self._documents),
219
+ "embedding_dim": self.config.embedding_dim,
220
+ "max_results": self.config.max_results,
221
+ }
222
+
223
+ def _simple_embedding(self, text: str) -> list[float]:
224
+ """Generate a simple hash-based embedding.
225
+
226
+ This is a placeholder for production embedding models like
227
+ sentence-transformers or OpenAI embeddings.
228
+
229
+ Parameters
230
+ ----------
231
+ text : str
232
+ Text to embed
233
+
234
+ Returns
235
+ -------
236
+ list[float]
237
+ Embedding vector
238
+ """
239
+ # Use multiple hash functions to create vector
240
+ dim = self.config.embedding_dim
241
+ vector = []
242
+
243
+ # Normalize text
244
+ normalized = text.lower().strip()
245
+
246
+ for i in range(dim):
247
+ # Create different seeds for hash
248
+ seed = f"{normalized}_{i}"
249
+ hash_val = int(hashlib.md5(seed.encode()).hexdigest(), 16)
250
+ # Normalize to [-1, 1]
251
+ vector.append((hash_val % 1000) / 500 - 1)
252
+
253
+ return vector
254
+
255
+ def _cosine_similarity(self, vec1: list[float], vec2: list[float]) -> float:
256
+ """Calculate cosine similarity between two vectors.
257
+
258
+ Parameters
259
+ ----------
260
+ vec1 : list[float]
261
+ First vector
262
+ vec2 : list[float]
263
+ Second vector
264
+
265
+ Returns
266
+ -------
267
+ float
268
+ Cosine similarity score [-1, 1]
269
+ """
270
+ if len(vec1) != len(vec2):
271
+ msg = "Vectors must have same dimension"
272
+ raise ValueError(msg)
273
+
274
+ dot_product = sum(a * b for a, b in zip(vec1, vec2, strict=False))
275
+ mag1 = math.sqrt(sum(a * a for a in vec1))
276
+ mag2 = math.sqrt(sum(b * b for b in vec2))
277
+
278
+ if mag1 == 0 or mag2 == 0:
279
+ return 0.0
280
+
281
+ return dot_product / (mag1 * mag2)
282
+
283
+ def __repr__(self) -> str:
284
+ """String representation."""
285
+ return f"InMemoryVectorStore(documents={len(self._documents)})"