hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Port interface definitions for Embedding generation.
|
|
2
|
+
|
|
3
|
+
.. deprecated:: 0.5.0
|
|
4
|
+
The standalone ``embedding`` port is deprecated in favor of the unified ``llm`` port
|
|
5
|
+
with ``SupportsEmbedding`` protocol. Use ``LLM`` port and implement ``SupportsEmbedding``
|
|
6
|
+
instead. This port will be removed in version 1.0.0.
|
|
7
|
+
|
|
8
|
+
Migration guide:
|
|
9
|
+
Old (deprecated)::
|
|
10
|
+
|
|
11
|
+
from hexdag.core.ports.embedding import Embedding
|
|
12
|
+
|
|
13
|
+
@adapter("embedding", name="my_embedder")
|
|
14
|
+
class MyEmbeddingAdapter(Embedding):
|
|
15
|
+
async def aembed(self, text: str) -> list[float]:
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
New (recommended)::
|
|
19
|
+
|
|
20
|
+
from hexdag.core.ports.llm import LLM, SupportsEmbedding
|
|
21
|
+
|
|
22
|
+
@adapter("llm", name="my_embedder")
|
|
23
|
+
class MyEmbeddingAdapter(LLM, SupportsEmbedding):
|
|
24
|
+
async def aresponse(self, messages):
|
|
25
|
+
# For pure embedding adapters, can raise NotImplementedError
|
|
26
|
+
raise NotImplementedError("This is an embedding-only adapter")
|
|
27
|
+
|
|
28
|
+
async def aembed(self, text: str) -> list[float]:
|
|
29
|
+
...
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import warnings
|
|
33
|
+
from abc import abstractmethod
|
|
34
|
+
from typing import TYPE_CHECKING, Protocol, runtime_checkable
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from hexdag.core.ports.healthcheck import HealthStatus
|
|
38
|
+
|
|
39
|
+
type ImageInput = str | bytes
|
|
40
|
+
|
|
41
|
+
# Issue deprecation warning when module is imported
|
|
42
|
+
warnings.warn(
|
|
43
|
+
"The 'embedding' port is deprecated and will be removed in version 0.5.0. "
|
|
44
|
+
"Use 'llm' port with 'SupportsEmbedding' protocol instead. "
|
|
45
|
+
"See hexdag.core.ports.llm.SupportsEmbedding for details.",
|
|
46
|
+
DeprecationWarning,
|
|
47
|
+
stacklevel=2,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@runtime_checkable
|
|
52
|
+
class Embedding(Protocol):
|
|
53
|
+
"""Port interface for Embedding generation.
|
|
54
|
+
|
|
55
|
+
.. deprecated:: 0.5.0
|
|
56
|
+
Use :class:`hexdag.core.ports.llm.LLM` with
|
|
57
|
+
:class:`hexdag.core.ports.llm.SupportsEmbedding` protocol instead.
|
|
58
|
+
|
|
59
|
+
Embeddings provide vector representations of text and images that capture semantic meaning.
|
|
60
|
+
Implementations may use various backends (OpenAI, local models, etc.) but must
|
|
61
|
+
provide the aembed method for generating embeddings from text.
|
|
62
|
+
|
|
63
|
+
Required Methods
|
|
64
|
+
----------------
|
|
65
|
+
- aembed(text: str): Generate embedding for text
|
|
66
|
+
- aembed_image(image: ImageInput): Generate embedding for image
|
|
67
|
+
|
|
68
|
+
Optional Methods
|
|
69
|
+
----------------
|
|
70
|
+
Adapters may optionally implement:
|
|
71
|
+
- ahealth_check(): Verify embedding API connectivity and availability
|
|
72
|
+
- aembed_batch(): Batch embedding generation for efficiency
|
|
73
|
+
- aembed_image_batch(): Batch image embedding generation for efficiency
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
async def aembed(self, text: str) -> list[float]:
|
|
78
|
+
"""Generate embedding vector for a single text input (async).
|
|
79
|
+
|
|
80
|
+
Args
|
|
81
|
+
----
|
|
82
|
+
text: Text string to embed
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
List of floats representing the embedding vector
|
|
87
|
+
|
|
88
|
+
Examples
|
|
89
|
+
--------
|
|
90
|
+
Single text embedding::
|
|
91
|
+
|
|
92
|
+
embedding = await adapter.aembed("Hello, world!")
|
|
93
|
+
# Returns: [0.123, -0.456, 0.789, ...]
|
|
94
|
+
"""
|
|
95
|
+
...
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
async def aembed_image(self, image: ImageInput) -> list[float]:
|
|
99
|
+
"""Generate embedding vector for a single image input (async).
|
|
100
|
+
|
|
101
|
+
Args
|
|
102
|
+
----
|
|
103
|
+
image: Image to embed, either as:
|
|
104
|
+
- str: File path to image or base64-encoded image data
|
|
105
|
+
- bytes: Raw image bytes
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
List of floats representing the embedding vector
|
|
110
|
+
|
|
111
|
+
Examples
|
|
112
|
+
--------
|
|
113
|
+
Image embedding from file path::
|
|
114
|
+
|
|
115
|
+
embedding = await adapter.aembed_image("/path/to/image.jpg")
|
|
116
|
+
# Returns: [0.123, -0.456, 0.789, ...]
|
|
117
|
+
|
|
118
|
+
Image embedding from bytes::
|
|
119
|
+
|
|
120
|
+
with open("image.jpg", "rb") as f:
|
|
121
|
+
image_bytes = f.read()
|
|
122
|
+
embedding = await adapter.aembed_image(image_bytes)
|
|
123
|
+
# Returns: [0.123, -0.456, 0.789, ...]
|
|
124
|
+
|
|
125
|
+
Image embedding from base64::
|
|
126
|
+
|
|
127
|
+
base64_image = "..."
|
|
128
|
+
embedding = await adapter.aembed_image(base64_image)
|
|
129
|
+
# Returns: [0.123, -0.456, 0.789, ...]
|
|
130
|
+
"""
|
|
131
|
+
...
|
|
132
|
+
|
|
133
|
+
async def aembed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
134
|
+
"""Generate embeddings for multiple texts efficiently (optional).
|
|
135
|
+
|
|
136
|
+
This method enables batch processing for improved performance when
|
|
137
|
+
embedding multiple texts. If not implemented, the framework will
|
|
138
|
+
fall back to sequential calls to aembed().
|
|
139
|
+
|
|
140
|
+
Args
|
|
141
|
+
----
|
|
142
|
+
texts: List of text strings to embed
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
List of embedding vectors, one per input text
|
|
147
|
+
|
|
148
|
+
Examples
|
|
149
|
+
--------
|
|
150
|
+
Batch embedding::
|
|
151
|
+
|
|
152
|
+
texts = ["Hello", "World", "AI"]
|
|
153
|
+
embeddings = await adapter.aembed_batch(texts)
|
|
154
|
+
# Returns: [[0.1, 0.2, ...], [0.3, 0.4, ...], [0.5, 0.6, ...]]
|
|
155
|
+
"""
|
|
156
|
+
...
|
|
157
|
+
|
|
158
|
+
async def aembed_image_batch(self, images: list[ImageInput]) -> list[list[float]]:
|
|
159
|
+
"""Generate embeddings for multiple images efficiently (optional).
|
|
160
|
+
|
|
161
|
+
This method enables batch processing for improved performance when
|
|
162
|
+
embedding multiple images. If not implemented, the framework will
|
|
163
|
+
fall back to sequential calls to aembed_image().
|
|
164
|
+
|
|
165
|
+
Args
|
|
166
|
+
----
|
|
167
|
+
images: List of images to embed, each can be:
|
|
168
|
+
- str: File path to image or base64-encoded image data
|
|
169
|
+
- bytes: Raw image bytes
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
List of embedding vectors, one per input image
|
|
174
|
+
|
|
175
|
+
Examples
|
|
176
|
+
--------
|
|
177
|
+
Batch image embedding::
|
|
178
|
+
|
|
179
|
+
images = [
|
|
180
|
+
"/path/to/image1.jpg",
|
|
181
|
+
"/path/to/image2.png",
|
|
182
|
+
image_bytes
|
|
183
|
+
]
|
|
184
|
+
embeddings = await adapter.aembed_image_batch(images)
|
|
185
|
+
# Returns: [[0.1, 0.2, ...], [0.3, 0.4, ...], [0.5, 0.6, ...]]
|
|
186
|
+
"""
|
|
187
|
+
...
|
|
188
|
+
|
|
189
|
+
async def ahealth_check(self) -> "HealthStatus":
|
|
190
|
+
"""Check embedding adapter health and connectivity (optional).
|
|
191
|
+
|
|
192
|
+
Adapters should verify:
|
|
193
|
+
- API connectivity to the embedding service
|
|
194
|
+
- Model availability
|
|
195
|
+
- Authentication status
|
|
196
|
+
- Rate limit status (if applicable)
|
|
197
|
+
|
|
198
|
+
This method is optional. If not implemented, the adapter will be
|
|
199
|
+
considered healthy by default.
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
HealthStatus
|
|
204
|
+
Current health status with details about connectivity and availability
|
|
205
|
+
|
|
206
|
+
Examples
|
|
207
|
+
--------
|
|
208
|
+
OpenAI adapter health check::
|
|
209
|
+
|
|
210
|
+
status = await openai_adapter.ahealth_check()
|
|
211
|
+
status.status # "healthy", "degraded", or "unhealthy"
|
|
212
|
+
status.latency_ms # Time taken for health check
|
|
213
|
+
status.details # {"model": "text-embedding-3-small"}
|
|
214
|
+
"""
|
|
215
|
+
...
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Port interface for DAG execution strategies.
|
|
2
|
+
|
|
3
|
+
This port enables pluggable execution backends for the orchestrator,
|
|
4
|
+
allowing the same DAG to run locally (in-process), with Celery
|
|
5
|
+
(distributed task queue), or Azure Functions (serverless).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import abstractmethod
|
|
9
|
+
from typing import Any, Protocol, runtime_checkable
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
12
|
+
|
|
13
|
+
from hexdag.core.orchestration.hook_context import PipelineStatus
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ExecutionTask(BaseModel):
|
|
17
|
+
"""A serializable task definition for node execution.
|
|
18
|
+
|
|
19
|
+
This model encapsulates all information needed to execute a node,
|
|
20
|
+
allowing it to be passed across process boundaries for distributed execution.
|
|
21
|
+
|
|
22
|
+
Attributes
|
|
23
|
+
----------
|
|
24
|
+
node_name : str
|
|
25
|
+
Unique identifier for the node within the DAG
|
|
26
|
+
node_input : Any
|
|
27
|
+
Input data for the node (None = executor should prepare from context)
|
|
28
|
+
wave_index : int
|
|
29
|
+
Wave number in the DAG execution sequence
|
|
30
|
+
should_validate : bool
|
|
31
|
+
Whether to perform input/output validation
|
|
32
|
+
context_data : dict[str, Any]
|
|
33
|
+
Execution context metadata (dag_id, run_id, attempt, etc.)
|
|
34
|
+
params : dict[str, Any]
|
|
35
|
+
Additional parameters passed to the node function
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, populate_by_name=True)
|
|
39
|
+
|
|
40
|
+
node_name: str
|
|
41
|
+
node_input: Any | None = None
|
|
42
|
+
wave_index: int = 0
|
|
43
|
+
should_validate: bool = Field(default=True, validation_alias="validate")
|
|
44
|
+
context_data: dict[str, Any] = Field(default_factory=dict)
|
|
45
|
+
params: dict[str, Any] = Field(default_factory=dict)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class ExecutionResult(BaseModel):
|
|
49
|
+
"""Result of a node execution.
|
|
50
|
+
|
|
51
|
+
Attributes
|
|
52
|
+
----------
|
|
53
|
+
node_name : str
|
|
54
|
+
Name of the executed node
|
|
55
|
+
output : Any
|
|
56
|
+
Result data from the node execution
|
|
57
|
+
duration_ms : float
|
|
58
|
+
Execution time in milliseconds
|
|
59
|
+
status : PipelineStatus
|
|
60
|
+
Execution status (SUCCESS, FAILED, CANCELLED)
|
|
61
|
+
error : str | None
|
|
62
|
+
Error message if execution failed, None otherwise
|
|
63
|
+
error_type : str | None
|
|
64
|
+
Type of error that occurred, if any
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
68
|
+
|
|
69
|
+
node_name: str
|
|
70
|
+
output: Any = None
|
|
71
|
+
duration_ms: float = 0.0
|
|
72
|
+
status: PipelineStatus = PipelineStatus.SUCCESS
|
|
73
|
+
error: str | None = None
|
|
74
|
+
error_type: str | None = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@runtime_checkable
|
|
78
|
+
class ExecutorPort(Protocol):
|
|
79
|
+
"""Port interface for DAG node execution strategies.
|
|
80
|
+
|
|
81
|
+
This port abstracts the execution backend, allowing the orchestrator
|
|
82
|
+
to delegate node execution to different implementations:
|
|
83
|
+
|
|
84
|
+
- **LocalExecutor**: In-process async execution (default)
|
|
85
|
+
- **CeleryExecutor**: Distributed execution via Celery task queue
|
|
86
|
+
- **AzureFunctionsExecutor**: Serverless execution via Azure Functions
|
|
87
|
+
|
|
88
|
+
The port provides a consistent interface regardless of where/how
|
|
89
|
+
nodes are actually executed.
|
|
90
|
+
|
|
91
|
+
Lifecycle
|
|
92
|
+
---------
|
|
93
|
+
Executors may implement optional setup/cleanup methods:
|
|
94
|
+
- asetup(): Initialize resources (connections, workers, etc.)
|
|
95
|
+
- aclose(): Cleanup resources (called automatically by orchestrator)
|
|
96
|
+
|
|
97
|
+
Optional Methods
|
|
98
|
+
----------------
|
|
99
|
+
Adapters may optionally implement:
|
|
100
|
+
- asetup(): Initialize executor resources before first use
|
|
101
|
+
- aclose(): Cleanup executor resources after execution completes
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
@abstractmethod
|
|
105
|
+
async def aexecute_node(self, task: ExecutionTask) -> ExecutionResult:
|
|
106
|
+
"""Execute a single node and return the result.
|
|
107
|
+
|
|
108
|
+
The executor is responsible for:
|
|
109
|
+
1. Resolving the node function (by name or from task data)
|
|
110
|
+
2. Executing the function with the provided input
|
|
111
|
+
3. Handling timeouts and errors
|
|
112
|
+
4. Wrapping the result in ExecutionResult
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
task : ExecutionTask
|
|
117
|
+
Task definition containing all execution parameters
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
ExecutionResult
|
|
122
|
+
Execution result with output or error information
|
|
123
|
+
|
|
124
|
+
Examples
|
|
125
|
+
--------
|
|
126
|
+
Local execution::
|
|
127
|
+
|
|
128
|
+
task = ExecutionTask(
|
|
129
|
+
node_name="fetch_data",
|
|
130
|
+
node_input={"url": "https://api.example.com"},
|
|
131
|
+
wave_index=0,
|
|
132
|
+
validate=True
|
|
133
|
+
)
|
|
134
|
+
result = await executor.aexecute_node(task)
|
|
135
|
+
if result.status == PipelineStatus.SUCCESS:
|
|
136
|
+
print(result.output)
|
|
137
|
+
|
|
138
|
+
Distributed execution (same interface)::
|
|
139
|
+
|
|
140
|
+
# Task is serialized and sent to remote worker
|
|
141
|
+
result = await celery_executor.aexecute_node(task)
|
|
142
|
+
"""
|
|
143
|
+
...
|
|
144
|
+
|
|
145
|
+
@abstractmethod
|
|
146
|
+
async def aexecute_wave(self, tasks: list[ExecutionTask]) -> dict[str, ExecutionResult]:
|
|
147
|
+
"""Execute multiple nodes concurrently within a wave.
|
|
148
|
+
|
|
149
|
+
The executor should run all tasks in parallel (respecting concurrency
|
|
150
|
+
limits) and return results as a dictionary.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
tasks : list[ExecutionTask]
|
|
155
|
+
List of tasks to execute in parallel
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
dict[str, ExecutionResult]
|
|
160
|
+
Map of node_name -> execution result
|
|
161
|
+
|
|
162
|
+
Raises
|
|
163
|
+
------
|
|
164
|
+
Exception
|
|
165
|
+
If any task fails, the exception should be captured in
|
|
166
|
+
ExecutionResult.error rather than raised directly
|
|
167
|
+
|
|
168
|
+
Examples
|
|
169
|
+
--------
|
|
170
|
+
Execute wave of independent nodes::
|
|
171
|
+
|
|
172
|
+
tasks = [
|
|
173
|
+
ExecutionTask(node_name="fetch_a", ...),
|
|
174
|
+
ExecutionTask(node_name="fetch_b", ...),
|
|
175
|
+
ExecutionTask(node_name="fetch_c", ...),
|
|
176
|
+
]
|
|
177
|
+
results = await executor.aexecute_wave(tasks)
|
|
178
|
+
# All three nodes executed concurrently
|
|
179
|
+
assert "fetch_a" in results
|
|
180
|
+
assert "fetch_b" in results
|
|
181
|
+
assert "fetch_c" in results
|
|
182
|
+
"""
|
|
183
|
+
...
|
|
184
|
+
|
|
185
|
+
async def asetup(self) -> None:
|
|
186
|
+
"""Initialize executor resources before first use (optional).
|
|
187
|
+
|
|
188
|
+
Implementations should use this to:
|
|
189
|
+
- Establish connections to brokers/queues
|
|
190
|
+
- Initialize worker pools
|
|
191
|
+
- Verify remote endpoints are accessible
|
|
192
|
+
- Load configuration
|
|
193
|
+
|
|
194
|
+
This method is called automatically by the orchestrator during
|
|
195
|
+
the managed_ports lifecycle.
|
|
196
|
+
|
|
197
|
+
Examples
|
|
198
|
+
--------
|
|
199
|
+
Celery executor setup::
|
|
200
|
+
|
|
201
|
+
async def asetup(self):
|
|
202
|
+
self.celery_app = Celery('hexdag', broker=self.broker_url)
|
|
203
|
+
await self._verify_broker_connection()
|
|
204
|
+
|
|
205
|
+
Azure Functions executor setup::
|
|
206
|
+
|
|
207
|
+
async def asetup(self):
|
|
208
|
+
self.http_client = aiohttp.ClientSession()
|
|
209
|
+
await self._verify_function_endpoint()
|
|
210
|
+
"""
|
|
211
|
+
...
|
|
212
|
+
|
|
213
|
+
async def aclose(self) -> None:
|
|
214
|
+
"""Cleanup executor resources after execution completes (optional).
|
|
215
|
+
|
|
216
|
+
Implementations should use this to:
|
|
217
|
+
- Close connections to brokers/queues
|
|
218
|
+
- Shutdown worker pools
|
|
219
|
+
- Close HTTP clients
|
|
220
|
+
- Flush any pending tasks
|
|
221
|
+
|
|
222
|
+
This method is called automatically by the orchestrator during
|
|
223
|
+
cleanup phase.
|
|
224
|
+
|
|
225
|
+
Examples
|
|
226
|
+
--------
|
|
227
|
+
Celery executor cleanup::
|
|
228
|
+
|
|
229
|
+
async def aclose(self):
|
|
230
|
+
await self.celery_app.close()
|
|
231
|
+
|
|
232
|
+
Azure Functions executor cleanup::
|
|
233
|
+
|
|
234
|
+
async def aclose(self):
|
|
235
|
+
await self.http_client.close()
|
|
236
|
+
"""
|
|
237
|
+
...
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""File storage port definition."""
|
|
2
|
+
|
|
3
|
+
from typing import Protocol, runtime_checkable
|
|
4
|
+
|
|
5
|
+
from hexdag.core.ports.healthcheck import HealthStatus
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@runtime_checkable
|
|
9
|
+
class FileStoragePort(Protocol):
|
|
10
|
+
"""Port for file storage operations.
|
|
11
|
+
|
|
12
|
+
Provides a unified interface for local and cloud file storage.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
async def aupload(self, local_path: str, remote_path: str) -> dict:
|
|
16
|
+
"""Upload a file.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
local_path : str
|
|
21
|
+
Path to local file
|
|
22
|
+
remote_path : str
|
|
23
|
+
Destination path in storage
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
dict
|
|
28
|
+
Upload result with metadata
|
|
29
|
+
"""
|
|
30
|
+
...
|
|
31
|
+
|
|
32
|
+
async def adownload(self, remote_path: str, local_path: str) -> dict:
|
|
33
|
+
"""Download a file.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
remote_path : str
|
|
38
|
+
Path in storage
|
|
39
|
+
local_path : str
|
|
40
|
+
Destination local path
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
dict
|
|
45
|
+
Download result with metadata
|
|
46
|
+
"""
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
async def adelete(self, remote_path: str) -> dict:
|
|
50
|
+
"""Delete a file.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
remote_path : str
|
|
55
|
+
Path to file in storage
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
dict
|
|
60
|
+
Deletion result
|
|
61
|
+
"""
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
async def alist(self, prefix: str = "") -> list[str]:
|
|
65
|
+
"""List files with optional prefix.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
prefix : str
|
|
70
|
+
Optional path prefix for filtering
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
list[str]
|
|
75
|
+
List of file paths
|
|
76
|
+
"""
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
async def aexists(self, remote_path: str) -> bool:
|
|
80
|
+
"""Check if file exists.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
remote_path : str
|
|
85
|
+
Path to check
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
bool
|
|
90
|
+
True if file exists
|
|
91
|
+
"""
|
|
92
|
+
...
|
|
93
|
+
|
|
94
|
+
async def aget_metadata(self, remote_path: str) -> dict:
|
|
95
|
+
"""Get file metadata.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
remote_path : str
|
|
100
|
+
Path to file
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
dict
|
|
105
|
+
File metadata (size, modified_time, etc.)
|
|
106
|
+
"""
|
|
107
|
+
...
|
|
108
|
+
|
|
109
|
+
async def ahealth_check(self) -> HealthStatus:
|
|
110
|
+
"""Check storage health.
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
HealthStatus
|
|
115
|
+
Health status of the storage system
|
|
116
|
+
"""
|
|
117
|
+
...
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Health check models and types for adapter monitoring.
|
|
2
|
+
|
|
3
|
+
This module provides shared types for adapter health checking.
|
|
4
|
+
Each port can optionally implement an ahealth_check() method that returns HealthStatus.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
__all__ = ["HealthStatus"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(slots=True)
|
|
16
|
+
class HealthStatus:
|
|
17
|
+
"""Health check result returned by adapters.
|
|
18
|
+
|
|
19
|
+
Adapters can implement an optional async ahealth_check() method
|
|
20
|
+
that returns this status object to indicate their operational state.
|
|
21
|
+
|
|
22
|
+
Attributes
|
|
23
|
+
----------
|
|
24
|
+
status : Literal["healthy", "degraded", "unhealthy"]
|
|
25
|
+
Current health status:
|
|
26
|
+
- "healthy": Adapter is fully operational
|
|
27
|
+
- "degraded": Adapter is operational but with reduced performance
|
|
28
|
+
- "unhealthy": Adapter is not operational
|
|
29
|
+
adapter_name : str
|
|
30
|
+
Name of the adapter being checked
|
|
31
|
+
port_name : str | None
|
|
32
|
+
Name of the port this adapter implements (e.g., "llm", "database")
|
|
33
|
+
details : dict[str, Any]
|
|
34
|
+
Additional context about the health check (e.g., error messages, metrics)
|
|
35
|
+
latency_ms : float | None
|
|
36
|
+
Time taken to perform the health check in milliseconds
|
|
37
|
+
error : Exception | None
|
|
38
|
+
Exception if the health check failed
|
|
39
|
+
|
|
40
|
+
Examples
|
|
41
|
+
--------
|
|
42
|
+
Example usage::
|
|
43
|
+
|
|
44
|
+
# Healthy adapter
|
|
45
|
+
HealthStatus(
|
|
46
|
+
status="healthy",
|
|
47
|
+
adapter_name="openai",
|
|
48
|
+
port_name="llm",
|
|
49
|
+
latency_ms=45.2
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Unhealthy adapter with error
|
|
53
|
+
HealthStatus(
|
|
54
|
+
status="unhealthy",
|
|
55
|
+
adapter_name="postgres",
|
|
56
|
+
port_name="database",
|
|
57
|
+
error=ConnectionError("Could not connect to database"),
|
|
58
|
+
details={"host": "localhost", "port": 5432}
|
|
59
|
+
)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
status: Literal["healthy", "degraded", "unhealthy"]
|
|
63
|
+
adapter_name: str
|
|
64
|
+
port_name: str | None = None
|
|
65
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
66
|
+
latency_ms: float | None = None
|
|
67
|
+
error: Exception | None = None
|
|
68
|
+
|
|
69
|
+
def is_healthy(self) -> bool:
|
|
70
|
+
"""Check if status is healthy.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
bool
|
|
75
|
+
True if status is "healthy", False otherwise
|
|
76
|
+
"""
|
|
77
|
+
return self.status == "healthy"
|
|
78
|
+
|
|
79
|
+
def is_operational(self) -> bool:
|
|
80
|
+
"""Check if adapter is operational (healthy or degraded).
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
bool
|
|
85
|
+
True if status is "healthy" or "degraded", False if "unhealthy"
|
|
86
|
+
"""
|
|
87
|
+
return self.status in ("healthy", "degraded")
|