hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
"""Local filesystem storage adapter."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import shutil
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from hexdag.core.configurable import AdapterConfig, ConfigurableAdapter
|
|
10
|
+
from hexdag.core.ports.file_storage import FileStoragePort
|
|
11
|
+
from hexdag.core.ports.healthcheck import HealthStatus
|
|
12
|
+
from hexdag.core.registry.decorators import adapter
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LocalFileStorageConfig(AdapterConfig):
|
|
16
|
+
"""Configuration for local file storage.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
base_path : str
|
|
21
|
+
Base directory for file storage (default: "./storage")
|
|
22
|
+
create_if_missing : bool
|
|
23
|
+
Create base directory if it doesn't exist (default: True)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
base_path: str = "./storage"
|
|
27
|
+
create_if_missing: bool = True
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@adapter("file_storage", name="local", namespace="storage")
|
|
31
|
+
class LocalFileStorage(ConfigurableAdapter, FileStoragePort):
|
|
32
|
+
"""Local filesystem storage adapter.
|
|
33
|
+
|
|
34
|
+
Provides file storage on the local filesystem with a base directory.
|
|
35
|
+
|
|
36
|
+
Examples
|
|
37
|
+
--------
|
|
38
|
+
Basic usage::
|
|
39
|
+
|
|
40
|
+
from hexdag.core.registry import registry
|
|
41
|
+
|
|
42
|
+
storage = registry.get("local", namespace="storage")(
|
|
43
|
+
base_path="./data"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Upload file
|
|
47
|
+
await storage.aupload("document.pdf", "docs/document.pdf")
|
|
48
|
+
|
|
49
|
+
# List files
|
|
50
|
+
files = await storage.alist(prefix="docs/")
|
|
51
|
+
|
|
52
|
+
# Download file
|
|
53
|
+
await storage.adownload("docs/document.pdf", "/tmp/document.pdf")
|
|
54
|
+
|
|
55
|
+
# Check if exists
|
|
56
|
+
exists = await storage.aexists("docs/document.pdf")
|
|
57
|
+
|
|
58
|
+
# Delete file
|
|
59
|
+
await storage.adelete("docs/document.pdf")
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
Config = LocalFileStorageConfig
|
|
63
|
+
|
|
64
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
65
|
+
"""Initialize local file storage."""
|
|
66
|
+
super().__init__(**kwargs)
|
|
67
|
+
self._base_path = Path(self.config.base_path)
|
|
68
|
+
|
|
69
|
+
if self.config.create_if_missing:
|
|
70
|
+
self._base_path.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
|
|
72
|
+
async def aupload(self, local_path: str, remote_path: str) -> dict:
|
|
73
|
+
"""Upload file to storage directory.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
local_path : str
|
|
78
|
+
Path to local file
|
|
79
|
+
remote_path : str
|
|
80
|
+
Destination path relative to base_path
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
dict
|
|
85
|
+
Upload result with metadata
|
|
86
|
+
"""
|
|
87
|
+
src = Path(local_path)
|
|
88
|
+
dest = self._base_path / remote_path
|
|
89
|
+
|
|
90
|
+
if not src.exists():
|
|
91
|
+
msg = f"Source file not found: {local_path}"
|
|
92
|
+
raise FileNotFoundError(msg)
|
|
93
|
+
|
|
94
|
+
# Create destination directory
|
|
95
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
|
|
97
|
+
# Copy file
|
|
98
|
+
shutil.copy2(src, dest)
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"uploaded": True,
|
|
102
|
+
"local_path": str(src),
|
|
103
|
+
"remote_path": remote_path,
|
|
104
|
+
"storage_path": str(dest),
|
|
105
|
+
"size_bytes": dest.stat().st_size,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async def adownload(self, remote_path: str, local_path: str) -> dict:
|
|
109
|
+
"""Download file from storage directory.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
remote_path : str
|
|
114
|
+
Path in storage relative to base_path
|
|
115
|
+
local_path : str
|
|
116
|
+
Destination local path
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
dict
|
|
121
|
+
Download result with metadata
|
|
122
|
+
"""
|
|
123
|
+
src = self._base_path / remote_path
|
|
124
|
+
dest = Path(local_path)
|
|
125
|
+
|
|
126
|
+
if not src.exists():
|
|
127
|
+
msg = f"File not found in storage: {remote_path}"
|
|
128
|
+
raise FileNotFoundError(msg)
|
|
129
|
+
|
|
130
|
+
# Create destination directory
|
|
131
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
|
|
133
|
+
# Copy file
|
|
134
|
+
shutil.copy2(src, dest)
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
"downloaded": True,
|
|
138
|
+
"remote_path": remote_path,
|
|
139
|
+
"local_path": str(dest),
|
|
140
|
+
"size_bytes": dest.stat().st_size,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async def adelete(self, remote_path: str) -> dict:
|
|
144
|
+
"""Delete file from storage.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
remote_path : str
|
|
149
|
+
Path to file relative to base_path
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
dict
|
|
154
|
+
Deletion result
|
|
155
|
+
"""
|
|
156
|
+
file_path = self._base_path / remote_path
|
|
157
|
+
|
|
158
|
+
if not file_path.exists():
|
|
159
|
+
msg = f"File not found: {remote_path}"
|
|
160
|
+
raise FileNotFoundError(msg)
|
|
161
|
+
|
|
162
|
+
# Delete file
|
|
163
|
+
file_path.unlink()
|
|
164
|
+
|
|
165
|
+
# Clean up empty parent directories
|
|
166
|
+
with contextlib.suppress(FileNotFoundError):
|
|
167
|
+
parent = file_path.parent
|
|
168
|
+
if parent.is_dir():
|
|
169
|
+
parent.rmdir()
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
"deleted": True,
|
|
173
|
+
"remote_path": remote_path,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
async def alist(self, prefix: str = "") -> list[str]:
|
|
177
|
+
"""List files with optional prefix.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
prefix : str
|
|
182
|
+
Optional path prefix for filtering
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
list[str]
|
|
187
|
+
List of file paths relative to base_path
|
|
188
|
+
"""
|
|
189
|
+
if prefix:
|
|
190
|
+
search_path = self._base_path / prefix
|
|
191
|
+
pattern = "**/*"
|
|
192
|
+
else:
|
|
193
|
+
search_path = self._base_path
|
|
194
|
+
pattern = "**/*"
|
|
195
|
+
|
|
196
|
+
if not search_path.exists():
|
|
197
|
+
return []
|
|
198
|
+
|
|
199
|
+
files = []
|
|
200
|
+
for path in search_path.glob(pattern):
|
|
201
|
+
if path.is_file():
|
|
202
|
+
# Get path relative to base_path
|
|
203
|
+
rel_path = path.relative_to(self._base_path)
|
|
204
|
+
files.append(str(rel_path))
|
|
205
|
+
|
|
206
|
+
return sorted(files)
|
|
207
|
+
|
|
208
|
+
async def aexists(self, remote_path: str) -> bool:
|
|
209
|
+
"""Check if file exists.
|
|
210
|
+
|
|
211
|
+
Parameters
|
|
212
|
+
----------
|
|
213
|
+
remote_path : str
|
|
214
|
+
Path to check relative to base_path
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
bool
|
|
219
|
+
True if file exists
|
|
220
|
+
"""
|
|
221
|
+
file_path = self._base_path / remote_path
|
|
222
|
+
return file_path.exists() and file_path.is_file()
|
|
223
|
+
|
|
224
|
+
async def aget_metadata(self, remote_path: str) -> dict:
|
|
225
|
+
"""Get file metadata.
|
|
226
|
+
|
|
227
|
+
Parameters
|
|
228
|
+
----------
|
|
229
|
+
remote_path : str
|
|
230
|
+
Path to file relative to base_path
|
|
231
|
+
|
|
232
|
+
Returns
|
|
233
|
+
-------
|
|
234
|
+
dict
|
|
235
|
+
File metadata (size, modified_time, created_time)
|
|
236
|
+
"""
|
|
237
|
+
file_path = self._base_path / remote_path
|
|
238
|
+
|
|
239
|
+
if not file_path.exists():
|
|
240
|
+
msg = f"File not found: {remote_path}"
|
|
241
|
+
raise FileNotFoundError(msg)
|
|
242
|
+
|
|
243
|
+
stat = file_path.stat()
|
|
244
|
+
|
|
245
|
+
return {
|
|
246
|
+
"path": remote_path,
|
|
247
|
+
"size_bytes": stat.st_size,
|
|
248
|
+
"modified_time": stat.st_mtime,
|
|
249
|
+
"created_time": stat.st_ctime,
|
|
250
|
+
"is_file": file_path.is_file(),
|
|
251
|
+
"absolute_path": str(file_path.absolute()),
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
async def ahealth_check(self) -> HealthStatus:
|
|
255
|
+
"""Check storage health.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
HealthStatus
|
|
260
|
+
Health status of the local storage
|
|
261
|
+
"""
|
|
262
|
+
start_time = time.time()
|
|
263
|
+
|
|
264
|
+
try:
|
|
265
|
+
# Check if base path exists and is writable
|
|
266
|
+
if not self._base_path.exists():
|
|
267
|
+
return HealthStatus(
|
|
268
|
+
status="unhealthy",
|
|
269
|
+
adapter_name="local_file_storage",
|
|
270
|
+
port_name="file_storage",
|
|
271
|
+
latency_ms=(time.time() - start_time) * 1000,
|
|
272
|
+
details={
|
|
273
|
+
"base_path": str(self._base_path),
|
|
274
|
+
"error": "Base path does not exist",
|
|
275
|
+
},
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Test write access
|
|
279
|
+
test_file = self._base_path / ".health_check"
|
|
280
|
+
try:
|
|
281
|
+
test_file.write_text("health_check")
|
|
282
|
+
test_file.unlink()
|
|
283
|
+
except Exception as e:
|
|
284
|
+
return HealthStatus(
|
|
285
|
+
status="unhealthy",
|
|
286
|
+
adapter_name="local_file_storage",
|
|
287
|
+
port_name="file_storage",
|
|
288
|
+
error=e,
|
|
289
|
+
latency_ms=(time.time() - start_time) * 1000,
|
|
290
|
+
details={
|
|
291
|
+
"base_path": str(self._base_path),
|
|
292
|
+
"error": "Cannot write to base path",
|
|
293
|
+
},
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Count files
|
|
297
|
+
file_count = len(await self.alist())
|
|
298
|
+
|
|
299
|
+
return HealthStatus(
|
|
300
|
+
status="healthy",
|
|
301
|
+
adapter_name="local_file_storage",
|
|
302
|
+
port_name="file_storage",
|
|
303
|
+
latency_ms=(time.time() - start_time) * 1000,
|
|
304
|
+
details={
|
|
305
|
+
"base_path": str(self._base_path.absolute()),
|
|
306
|
+
"file_count": file_count,
|
|
307
|
+
"writable": True,
|
|
308
|
+
},
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
except Exception as e:
|
|
312
|
+
return HealthStatus(
|
|
313
|
+
status="unhealthy",
|
|
314
|
+
adapter_name="local_file_storage",
|
|
315
|
+
port_name="file_storage",
|
|
316
|
+
error=e,
|
|
317
|
+
latency_ms=(time.time() - start_time) * 1000,
|
|
318
|
+
details={
|
|
319
|
+
"base_path": str(self._base_path),
|
|
320
|
+
},
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def __repr__(self) -> str:
|
|
324
|
+
"""String representation."""
|
|
325
|
+
return f"LocalFileStorage(base_path={self._base_path})"
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""Vector store port interface for RAG operations."""
|
|
2
|
+
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
5
|
+
|
|
6
|
+
from hexdag.core.registry.decorators import port
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from hexdag.core.ports.healthcheck import HealthStatus
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@port(name="vector_store", namespace="plugin")
|
|
13
|
+
@runtime_checkable
|
|
14
|
+
class VectorStorePort(Protocol):
|
|
15
|
+
"""Port interface for vector store adapters.
|
|
16
|
+
|
|
17
|
+
Vector stores provide semantic search capabilities by storing text embeddings
|
|
18
|
+
and enabling similarity-based retrieval. This port abstracts different vector
|
|
19
|
+
database backends (in-memory, PostgreSQL pgvector, ChromaDB, etc.).
|
|
20
|
+
|
|
21
|
+
All vector store implementations must provide:
|
|
22
|
+
- Document storage with embeddings
|
|
23
|
+
- Similarity search
|
|
24
|
+
- Document management (count, clear)
|
|
25
|
+
|
|
26
|
+
Optional Methods
|
|
27
|
+
----------------
|
|
28
|
+
Adapters may optionally implement:
|
|
29
|
+
- adelete(ids): Delete documents by ID
|
|
30
|
+
- aget_stats(): Get storage statistics
|
|
31
|
+
- asetup(): Initialize database/connections
|
|
32
|
+
- aclose(): Clean up resources
|
|
33
|
+
- ahealth_check(): Verify adapter health and connectivity
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
async def aadd_documents(
|
|
38
|
+
self,
|
|
39
|
+
documents: list[dict[str, Any]],
|
|
40
|
+
embeddings: list[list[float]] | None = None,
|
|
41
|
+
) -> None | dict[str, Any]:
|
|
42
|
+
"""Add documents to the vector store.
|
|
43
|
+
|
|
44
|
+
Args
|
|
45
|
+
----
|
|
46
|
+
documents: List of documents with required 'text' field and optional 'id', 'metadata'
|
|
47
|
+
embeddings: Optional pre-computed embeddings (if None, adapter may compute them)
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
Optional dict with operation results (e.g., {'added': 5, 'ids': [...]})
|
|
52
|
+
|
|
53
|
+
Examples
|
|
54
|
+
--------
|
|
55
|
+
Basic usage::
|
|
56
|
+
|
|
57
|
+
await vector_store.aadd_documents([
|
|
58
|
+
{"text": "Python programming", "id": "doc1"},
|
|
59
|
+
{"text": "Machine learning", "id": "doc2"},
|
|
60
|
+
])
|
|
61
|
+
|
|
62
|
+
With embeddings::
|
|
63
|
+
|
|
64
|
+
embeddings = await embedder.aembed_texts(["text1", "text2"])
|
|
65
|
+
await vector_store.aadd_documents(documents, embeddings=embeddings)
|
|
66
|
+
"""
|
|
67
|
+
...
|
|
68
|
+
|
|
69
|
+
@abstractmethod
|
|
70
|
+
async def asearch(
|
|
71
|
+
self,
|
|
72
|
+
query: str,
|
|
73
|
+
query_embedding: list[float] | None = None,
|
|
74
|
+
top_k: int | None = None,
|
|
75
|
+
filter_metadata: dict[str, Any] | None = None,
|
|
76
|
+
) -> list[dict[str, Any]]:
|
|
77
|
+
"""Search for similar documents.
|
|
78
|
+
|
|
79
|
+
Args
|
|
80
|
+
----
|
|
81
|
+
query: Search query text
|
|
82
|
+
query_embedding: Optional pre-computed query embedding
|
|
83
|
+
top_k: Number of results to return (adapter may have default)
|
|
84
|
+
filter_metadata: Optional metadata filters (e.g., {"category": "docs"})
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
List of matching documents with 'id', 'text', 'score', and optional 'metadata'
|
|
89
|
+
Scores should be similarity scores (higher = more similar)
|
|
90
|
+
|
|
91
|
+
Examples
|
|
92
|
+
--------
|
|
93
|
+
Basic search::
|
|
94
|
+
|
|
95
|
+
results = await vector_store.asearch("Python tutorial", top_k=5)
|
|
96
|
+
for doc in results:
|
|
97
|
+
print(f"{doc['text']} (score: {doc['score']:.3f})")
|
|
98
|
+
|
|
99
|
+
With metadata filter::
|
|
100
|
+
|
|
101
|
+
results = await vector_store.asearch(
|
|
102
|
+
"tutorial",
|
|
103
|
+
filter_metadata={"category": "programming"}
|
|
104
|
+
)
|
|
105
|
+
"""
|
|
106
|
+
...
|
|
107
|
+
|
|
108
|
+
@abstractmethod
|
|
109
|
+
async def aclear(self) -> None | dict[str, Any]:
|
|
110
|
+
"""Clear all documents from the vector store.
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
Optional dict with operation results (e.g., {'removed': 100})
|
|
115
|
+
|
|
116
|
+
Examples
|
|
117
|
+
--------
|
|
118
|
+
::
|
|
119
|
+
|
|
120
|
+
await vector_store.aclear()
|
|
121
|
+
count = await vector_store.acount()
|
|
122
|
+
assert count == 0
|
|
123
|
+
"""
|
|
124
|
+
...
|
|
125
|
+
|
|
126
|
+
@abstractmethod
|
|
127
|
+
async def acount(self) -> int:
|
|
128
|
+
"""Get the number of documents in the vector store.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
Number of documents currently stored
|
|
133
|
+
|
|
134
|
+
Examples
|
|
135
|
+
--------
|
|
136
|
+
::
|
|
137
|
+
|
|
138
|
+
count = await vector_store.acount()
|
|
139
|
+
print(f"Store contains {count} documents")
|
|
140
|
+
"""
|
|
141
|
+
...
|
|
142
|
+
|
|
143
|
+
# Optional methods for enhanced functionality
|
|
144
|
+
|
|
145
|
+
async def adelete(self, ids: list[str]) -> None | dict[str, Any]:
|
|
146
|
+
"""Delete documents by ID (optional).
|
|
147
|
+
|
|
148
|
+
Args
|
|
149
|
+
----
|
|
150
|
+
ids: List of document IDs to delete
|
|
151
|
+
|
|
152
|
+
Returns
|
|
153
|
+
-------
|
|
154
|
+
Optional dict with operation results (e.g., {'deleted': 3})
|
|
155
|
+
|
|
156
|
+
Examples
|
|
157
|
+
--------
|
|
158
|
+
::
|
|
159
|
+
|
|
160
|
+
await vector_store.adelete(["doc1", "doc2"])
|
|
161
|
+
"""
|
|
162
|
+
...
|
|
163
|
+
|
|
164
|
+
async def aget_stats(self) -> dict[str, Any]:
|
|
165
|
+
"""Get statistics about the vector store (optional).
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
Dictionary with statistics like document count, storage size, etc.
|
|
170
|
+
|
|
171
|
+
Examples
|
|
172
|
+
--------
|
|
173
|
+
::
|
|
174
|
+
|
|
175
|
+
stats = await vector_store.aget_stats()
|
|
176
|
+
print(f"Documents: {stats['document_count']}")
|
|
177
|
+
print(f"Size: {stats.get('storage_size_mb', 'N/A')} MB")
|
|
178
|
+
"""
|
|
179
|
+
...
|
|
180
|
+
|
|
181
|
+
async def asetup(self) -> None:
|
|
182
|
+
"""Initialize the vector store (optional).
|
|
183
|
+
|
|
184
|
+
This method can be used for one-time setup like:
|
|
185
|
+
- Creating database tables/indexes
|
|
186
|
+
- Establishing connections
|
|
187
|
+
- Loading models
|
|
188
|
+
|
|
189
|
+
Examples
|
|
190
|
+
--------
|
|
191
|
+
::
|
|
192
|
+
|
|
193
|
+
pgvector = PgVectorAdapter(connection_string="...")
|
|
194
|
+
await pgvector.asetup() # Creates tables and indexes
|
|
195
|
+
"""
|
|
196
|
+
...
|
|
197
|
+
|
|
198
|
+
async def aclose(self) -> None:
|
|
199
|
+
"""Close connections and clean up resources (optional).
|
|
200
|
+
|
|
201
|
+
Examples
|
|
202
|
+
--------
|
|
203
|
+
::
|
|
204
|
+
|
|
205
|
+
await vector_store.aclose()
|
|
206
|
+
"""
|
|
207
|
+
...
|
|
208
|
+
|
|
209
|
+
async def ahealth_check(self) -> "HealthStatus":
|
|
210
|
+
"""Check vector store health and connectivity (optional).
|
|
211
|
+
|
|
212
|
+
Adapters should verify:
|
|
213
|
+
- Storage backend connectivity (database, file system, etc.)
|
|
214
|
+
- Read/write operations
|
|
215
|
+
- Index health and performance
|
|
216
|
+
- Storage capacity/availability
|
|
217
|
+
|
|
218
|
+
This method is optional. If not implemented, the adapter will be
|
|
219
|
+
considered healthy by default.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
HealthStatus
|
|
224
|
+
Current health status with details about storage backend
|
|
225
|
+
|
|
226
|
+
Examples
|
|
227
|
+
--------
|
|
228
|
+
::
|
|
229
|
+
|
|
230
|
+
status = await vector_store.ahealth_check()
|
|
231
|
+
if status.status == "healthy":
|
|
232
|
+
print(f"Store is healthy (latency: {status.latency_ms}ms)")
|
|
233
|
+
else:
|
|
234
|
+
print(f"Store is {status.status}: {status.error}")
|
|
235
|
+
"""
|
|
236
|
+
...
|