hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Base protocol interface for making API calls.
|
|
2
|
+
|
|
3
|
+
This is a fundamental protocol that other ports can inherit from.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from abc import abstractmethod
|
|
7
|
+
from typing import Any, Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@runtime_checkable
|
|
11
|
+
class APICall(Protocol):
|
|
12
|
+
"""Base protocol for making external API calls.
|
|
13
|
+
|
|
14
|
+
This is a fundamental protocol that provides a standard interface for
|
|
15
|
+
making HTTP/REST API calls. Other protocols (like LLM) can inherit from
|
|
16
|
+
this to indicate they also support API call functionality.
|
|
17
|
+
|
|
18
|
+
Implementations can use requests, httpx, aiohttp, or any HTTP client.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
# Required
|
|
22
|
+
@abstractmethod
|
|
23
|
+
async def aget(
|
|
24
|
+
self,
|
|
25
|
+
url: str,
|
|
26
|
+
headers: dict[str, str] | None = None,
|
|
27
|
+
params: dict[str, Any] | None = None,
|
|
28
|
+
**kwargs: Any,
|
|
29
|
+
) -> dict[str, Any]:
|
|
30
|
+
"""Make an async GET request.
|
|
31
|
+
|
|
32
|
+
Args
|
|
33
|
+
----
|
|
34
|
+
url: The URL to make the request to
|
|
35
|
+
headers: Optional headers to include
|
|
36
|
+
params: Optional query parameters
|
|
37
|
+
**kwargs: Additional implementation-specific options
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
Response data as a dictionary
|
|
42
|
+
"""
|
|
43
|
+
...
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
async def apost(
|
|
47
|
+
self,
|
|
48
|
+
url: str,
|
|
49
|
+
json: dict[str, Any] | None = None,
|
|
50
|
+
data: Any | None = None,
|
|
51
|
+
headers: dict[str, str] | None = None,
|
|
52
|
+
**kwargs: Any,
|
|
53
|
+
) -> dict[str, Any]:
|
|
54
|
+
"""Make an async POST request.
|
|
55
|
+
|
|
56
|
+
Args
|
|
57
|
+
----
|
|
58
|
+
url: The URL to make the request to
|
|
59
|
+
json: JSON data to send in the request body
|
|
60
|
+
data: Form data or raw data to send
|
|
61
|
+
headers: Optional headers to include
|
|
62
|
+
**kwargs: Additional implementation-specific options
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
Response data as a dictionary
|
|
67
|
+
"""
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
# Optional methods for enhanced functionality
|
|
71
|
+
async def aput(
|
|
72
|
+
self,
|
|
73
|
+
url: str,
|
|
74
|
+
json: dict[str, Any] | None = None,
|
|
75
|
+
data: Any | None = None,
|
|
76
|
+
headers: dict[str, str] | None = None,
|
|
77
|
+
**kwargs: Any,
|
|
78
|
+
) -> dict[str, Any]:
|
|
79
|
+
"""Make an async PUT request.
|
|
80
|
+
|
|
81
|
+
Args
|
|
82
|
+
----
|
|
83
|
+
url: The URL to make the request to
|
|
84
|
+
json: JSON data to send in the request body
|
|
85
|
+
data: Form data or raw data to send
|
|
86
|
+
headers: Optional headers to include
|
|
87
|
+
**kwargs: Additional implementation-specific options
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
Response data as a dictionary
|
|
92
|
+
"""
|
|
93
|
+
...
|
|
94
|
+
|
|
95
|
+
async def adelete(
|
|
96
|
+
self,
|
|
97
|
+
url: str,
|
|
98
|
+
headers: dict[str, str] | None = None,
|
|
99
|
+
**kwargs: Any,
|
|
100
|
+
) -> dict[str, Any]:
|
|
101
|
+
"""Make an async DELETE request.
|
|
102
|
+
|
|
103
|
+
Args
|
|
104
|
+
----
|
|
105
|
+
url: The URL to make the request to
|
|
106
|
+
headers: Optional headers to include
|
|
107
|
+
**kwargs: Additional implementation-specific options
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
Response data as a dictionary
|
|
112
|
+
"""
|
|
113
|
+
...
|
|
114
|
+
|
|
115
|
+
async def arequest(
|
|
116
|
+
self,
|
|
117
|
+
method: str,
|
|
118
|
+
url: str,
|
|
119
|
+
**kwargs: Any,
|
|
120
|
+
) -> dict[str, Any]:
|
|
121
|
+
"""Make a generic async HTTP request.
|
|
122
|
+
|
|
123
|
+
Args
|
|
124
|
+
----
|
|
125
|
+
method: HTTP method (GET, POST, PUT, DELETE, etc.)
|
|
126
|
+
url: The URL to make the request to
|
|
127
|
+
**kwargs: Additional request options (headers, data, json, etc.)
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
Response data as a dictionary
|
|
132
|
+
"""
|
|
133
|
+
...
|
|
@@ -0,0 +1,489 @@
|
|
|
1
|
+
"""Database port interface for accessing database schema information."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from collections.abc import AsyncIterator
|
|
12
|
+
|
|
13
|
+
from hexdag.core.ports.healthcheck import HealthStatus
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ColumnType(Enum):
|
|
17
|
+
"""Database column types."""
|
|
18
|
+
|
|
19
|
+
TEXT = "text"
|
|
20
|
+
INT = "int"
|
|
21
|
+
FLOAT = "float"
|
|
22
|
+
BOOLEAN = "boolean"
|
|
23
|
+
DATE = "date"
|
|
24
|
+
TIMESTAMP = "timestamp"
|
|
25
|
+
DECIMAL = "decimal"
|
|
26
|
+
BLOB = "blob"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class ColumnSchema:
|
|
31
|
+
"""Schema information for a database column."""
|
|
32
|
+
|
|
33
|
+
name: str
|
|
34
|
+
type: ColumnType | str # ColumnType enum or database-specific type string
|
|
35
|
+
nullable: bool = True
|
|
36
|
+
primary_key: bool = False
|
|
37
|
+
foreign_key: str | None = None # Format: "table.column"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class TableSchema:
|
|
42
|
+
"""Schema information for a database table."""
|
|
43
|
+
|
|
44
|
+
name: str
|
|
45
|
+
columns: list[ColumnSchema]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@runtime_checkable
|
|
49
|
+
class DatabasePort(Protocol):
|
|
50
|
+
"""Port interface for accessing database schema and metadata.
|
|
51
|
+
|
|
52
|
+
This port abstracts access to database systems, allowing the analytics engine to work with
|
|
53
|
+
different database backends. Implementations may use direct connections (psycopg2, SQLAlchemy)
|
|
54
|
+
or REST APIs for cloud databases (Snowflake, BigQuery, etc.).
|
|
55
|
+
|
|
56
|
+
Optional Methods
|
|
57
|
+
----------------
|
|
58
|
+
Adapters may optionally implement:
|
|
59
|
+
- ahealth_check(): Verify database connectivity and query execution
|
|
60
|
+
- query(): Streaming query interface with filters
|
|
61
|
+
- aget_relationships(): Foreign key relationships
|
|
62
|
+
- aget_indexes(): Index information
|
|
63
|
+
- aget_table_statistics(): Table statistics
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
# Required methods
|
|
67
|
+
@abstractmethod
|
|
68
|
+
async def aget_table_schemas(self) -> dict[str, dict[str, Any]]:
|
|
69
|
+
"""Get schema information for all tables.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
Dictionary mapping table names to schema information with structure::
|
|
74
|
+
|
|
75
|
+
{
|
|
76
|
+
"table_name": {
|
|
77
|
+
"table_name": str,
|
|
78
|
+
"columns": dict[str, str], # column_name -> data_type
|
|
79
|
+
"primary_keys": list[str],
|
|
80
|
+
"foreign_keys": list[dict[str, str]]
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
"""
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
async def aexecute_query(
|
|
89
|
+
self, query: str, params: dict[str, Any] | None = None
|
|
90
|
+
) -> list[dict[str, Any]]:
|
|
91
|
+
"""Execute a SQL query and return results.
|
|
92
|
+
|
|
93
|
+
Args
|
|
94
|
+
----
|
|
95
|
+
query: SQL query to execute
|
|
96
|
+
params: Optional query parameters for safe parameterized queries
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
List of dictionaries representing query result rows
|
|
101
|
+
"""
|
|
102
|
+
...
|
|
103
|
+
|
|
104
|
+
# Optional methods for enhanced functionality
|
|
105
|
+
async def aget_relationships(self) -> list[dict[str, Any]]:
|
|
106
|
+
"""Get foreign key relationships between tables.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
List of relationship dictionaries with structure::
|
|
111
|
+
|
|
112
|
+
{
|
|
113
|
+
"from_table": str,
|
|
114
|
+
"from_column": str,
|
|
115
|
+
"to_table": str,
|
|
116
|
+
"to_column": str,
|
|
117
|
+
"relationship_type": str # "many_to_one", etc.
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
...
|
|
122
|
+
|
|
123
|
+
async def aget_indexes(self) -> list[dict[str, Any]]:
|
|
124
|
+
"""Get index information for performance optimization.
|
|
125
|
+
|
|
126
|
+
Returns
|
|
127
|
+
-------
|
|
128
|
+
List of index dictionaries with structure::
|
|
129
|
+
|
|
130
|
+
{
|
|
131
|
+
"index_name": str,
|
|
132
|
+
"table_name": str,
|
|
133
|
+
"columns": list[str],
|
|
134
|
+
"index_type": str, # "btree", "hash", etc.
|
|
135
|
+
"is_unique": bool
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
"""
|
|
139
|
+
...
|
|
140
|
+
|
|
141
|
+
async def aget_table_statistics(self) -> dict[str, dict[str, Any]]:
|
|
142
|
+
"""Get table statistics for query optimization.
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
Dictionary mapping table names to statistics with structure::
|
|
147
|
+
|
|
148
|
+
{
|
|
149
|
+
"table_name": {
|
|
150
|
+
"row_count": int,
|
|
151
|
+
"size_bytes": int,
|
|
152
|
+
"last_updated": str
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
"""
|
|
157
|
+
...
|
|
158
|
+
|
|
159
|
+
async def ahealth_check(self) -> HealthStatus:
|
|
160
|
+
"""Check database adapter health and connectivity (optional).
|
|
161
|
+
|
|
162
|
+
Adapters should verify:
|
|
163
|
+
- Database connection status
|
|
164
|
+
- Connection pool health
|
|
165
|
+
- Basic query execution (e.g., SELECT 1)
|
|
166
|
+
- Authentication status
|
|
167
|
+
|
|
168
|
+
This method is optional. If not implemented, the adapter will be
|
|
169
|
+
considered healthy by default.
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
HealthStatus
|
|
174
|
+
Current health status with details about database connectivity
|
|
175
|
+
|
|
176
|
+
Examples
|
|
177
|
+
--------
|
|
178
|
+
Example usage::
|
|
179
|
+
|
|
180
|
+
# PostgreSQL adapter health check
|
|
181
|
+
status = await postgres_adapter.ahealth_check()
|
|
182
|
+
status.status # "healthy", "degraded", or "unhealthy"
|
|
183
|
+
status.latency_ms # Time taken for health check query
|
|
184
|
+
status.details # {"pool_size": 10, "active_connections": 3}
|
|
185
|
+
"""
|
|
186
|
+
...
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@runtime_checkable
|
|
190
|
+
class SupportsStreamingQuery(Protocol):
|
|
191
|
+
"""Optional protocol for adapters that support streaming queries.
|
|
192
|
+
|
|
193
|
+
This protocol enables efficient handling of large result sets by streaming
|
|
194
|
+
rows one at a time rather than loading all results into memory.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
def query(
|
|
198
|
+
self,
|
|
199
|
+
table: str,
|
|
200
|
+
filters: dict[str, Any] | None = None,
|
|
201
|
+
columns: list[str] | None = None,
|
|
202
|
+
limit: int | None = None,
|
|
203
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
204
|
+
"""Query rows from a table with optional filtering and column selection.
|
|
205
|
+
|
|
206
|
+
Args
|
|
207
|
+
----
|
|
208
|
+
table: Name of the table to query
|
|
209
|
+
filters: Optional column-value pairs to filter by
|
|
210
|
+
columns: Optional list of columns to return (None = all)
|
|
211
|
+
limit: Optional maximum number of rows to return
|
|
212
|
+
|
|
213
|
+
Returns
|
|
214
|
+
-------
|
|
215
|
+
AsyncIterator[dict]: Stream of rows as dictionaries
|
|
216
|
+
|
|
217
|
+
Raises
|
|
218
|
+
------
|
|
219
|
+
ValueError
|
|
220
|
+
If table doesn't exist or filters/columns are invalid
|
|
221
|
+
"""
|
|
222
|
+
...
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@runtime_checkable
|
|
226
|
+
class SupportsRawSQL(Protocol):
|
|
227
|
+
"""Optional protocol for adapters that support raw SQL queries with streaming."""
|
|
228
|
+
|
|
229
|
+
async def query_raw(
|
|
230
|
+
self, sql: str, params: dict[str, Any] | None = None
|
|
231
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
232
|
+
"""Execute a raw SQL query with streaming results.
|
|
233
|
+
|
|
234
|
+
Args
|
|
235
|
+
----
|
|
236
|
+
sql: SQL query string
|
|
237
|
+
params: Optional query parameters
|
|
238
|
+
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
AsyncIterator[dict]: Stream of result rows
|
|
242
|
+
|
|
243
|
+
Raises
|
|
244
|
+
------
|
|
245
|
+
ValueError
|
|
246
|
+
If SQL is invalid
|
|
247
|
+
"""
|
|
248
|
+
...
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@runtime_checkable
|
|
252
|
+
class SupportsTableSchema(Protocol):
|
|
253
|
+
"""Optional protocol for adapters that support table schema information."""
|
|
254
|
+
|
|
255
|
+
async def get_table_schema(self, table: str) -> TableSchema:
|
|
256
|
+
"""Get schema information for a table.
|
|
257
|
+
|
|
258
|
+
Args
|
|
259
|
+
----
|
|
260
|
+
table: Name of the table to get schema for
|
|
261
|
+
"""
|
|
262
|
+
...
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@runtime_checkable
|
|
266
|
+
class SupportsIndexes(Protocol):
|
|
267
|
+
"""Optional protocol for adapters that support index information."""
|
|
268
|
+
|
|
269
|
+
async def get_indexes(self, table: str) -> list[dict[str, Any]]:
|
|
270
|
+
"""Get index information for a table.
|
|
271
|
+
|
|
272
|
+
Args
|
|
273
|
+
----
|
|
274
|
+
table: Name of the table to get index information for
|
|
275
|
+
"""
|
|
276
|
+
...
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@runtime_checkable
|
|
280
|
+
class SupportsStatistics(Protocol):
|
|
281
|
+
"""Optional protocol for adapters that support table statistics."""
|
|
282
|
+
|
|
283
|
+
async def get_table_statistics(self, table: str) -> dict[str, Any]:
|
|
284
|
+
"""Get table statistics for a table.
|
|
285
|
+
|
|
286
|
+
Args
|
|
287
|
+
----
|
|
288
|
+
table: Name of the table to get statistics for
|
|
289
|
+
"""
|
|
290
|
+
...
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
@runtime_checkable
|
|
294
|
+
class SupportsReadOnly(Protocol):
|
|
295
|
+
"""Optional protocol for adapters that are read-only.
|
|
296
|
+
|
|
297
|
+
This protocol marker indicates that a database adapter only supports
|
|
298
|
+
read operations (SELECT queries) and will raise errors on write attempts.
|
|
299
|
+
Useful for analytics databases, reporting systems, or production replicas.
|
|
300
|
+
|
|
301
|
+
Adapters implementing this protocol should raise appropriate errors when
|
|
302
|
+
write operations are attempted through aexecute_query().
|
|
303
|
+
|
|
304
|
+
Examples
|
|
305
|
+
--------
|
|
306
|
+
Check if adapter is read-only::
|
|
307
|
+
|
|
308
|
+
if isinstance(db_adapter, SupportsReadOnly):
|
|
309
|
+
print("This adapter is read-only")
|
|
310
|
+
# Only use SELECT queries
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
@abstractmethod
|
|
314
|
+
async def is_read_only(self) -> bool:
|
|
315
|
+
"""Check if the adapter is in read-only mode.
|
|
316
|
+
|
|
317
|
+
Returns
|
|
318
|
+
-------
|
|
319
|
+
True if adapter is read-only, False otherwise
|
|
320
|
+
"""
|
|
321
|
+
...
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@runtime_checkable
|
|
325
|
+
class SupportsVectorSearch(Protocol):
|
|
326
|
+
"""Optional protocol for adapters that support vector similarity search.
|
|
327
|
+
|
|
328
|
+
This protocol enables semantic search capabilities using vector embeddings,
|
|
329
|
+
commonly used with vector databases like Pinecone, Weaviate, pgvector, etc.
|
|
330
|
+
"""
|
|
331
|
+
|
|
332
|
+
@abstractmethod
|
|
333
|
+
async def avector_search(
|
|
334
|
+
self,
|
|
335
|
+
collection: str,
|
|
336
|
+
query_vector: list[float],
|
|
337
|
+
top_k: int = 10,
|
|
338
|
+
filters: dict[str, Any] | None = None,
|
|
339
|
+
include_metadata: bool = True,
|
|
340
|
+
include_vectors: bool = False,
|
|
341
|
+
) -> list[dict[str, Any]]:
|
|
342
|
+
"""Perform vector similarity search.
|
|
343
|
+
|
|
344
|
+
Args
|
|
345
|
+
----
|
|
346
|
+
collection: Name of the vector collection/table to search
|
|
347
|
+
query_vector: Query embedding vector (must match collection dimension)
|
|
348
|
+
top_k: Number of nearest neighbors to return (default: 10)
|
|
349
|
+
filters: Optional metadata filters to apply
|
|
350
|
+
include_metadata: Whether to include metadata in results (default: True)
|
|
351
|
+
include_vectors: Whether to include vectors in results (default: False)
|
|
352
|
+
|
|
353
|
+
Returns
|
|
354
|
+
-------
|
|
355
|
+
List of search results with structure::
|
|
356
|
+
|
|
357
|
+
[
|
|
358
|
+
{
|
|
359
|
+
"id": str, # Document/vector ID
|
|
360
|
+
"score": float, # Similarity score (0-1 or distance)
|
|
361
|
+
"metadata": dict, # Document metadata (if include_metadata=True)
|
|
362
|
+
"vector": list[float] # Vector embedding (if include_vectors=True)
|
|
363
|
+
}
|
|
364
|
+
]
|
|
365
|
+
|
|
366
|
+
Raises
|
|
367
|
+
------
|
|
368
|
+
ValueError
|
|
369
|
+
If collection doesn't exist or query_vector dimension mismatch
|
|
370
|
+
|
|
371
|
+
Examples
|
|
372
|
+
--------
|
|
373
|
+
Basic semantic search::
|
|
374
|
+
|
|
375
|
+
results = await db.avector_search(
|
|
376
|
+
collection="documents",
|
|
377
|
+
query_vector=[0.1, 0.2, ...], # 768-dim embedding
|
|
378
|
+
top_k=5
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
Search with metadata filtering::
|
|
382
|
+
|
|
383
|
+
results = await db.avector_search(
|
|
384
|
+
collection="products",
|
|
385
|
+
query_vector=embedding,
|
|
386
|
+
top_k=10,
|
|
387
|
+
filters={"category": "electronics", "price_range": "100-500"}
|
|
388
|
+
)
|
|
389
|
+
"""
|
|
390
|
+
...
|
|
391
|
+
|
|
392
|
+
@abstractmethod
|
|
393
|
+
async def avector_upsert(
|
|
394
|
+
self,
|
|
395
|
+
collection: str,
|
|
396
|
+
vectors: list[dict[str, Any]],
|
|
397
|
+
) -> dict[str, Any]:
|
|
398
|
+
"""Insert or update vectors in a collection.
|
|
399
|
+
|
|
400
|
+
Args
|
|
401
|
+
----
|
|
402
|
+
collection: Name of the vector collection/table
|
|
403
|
+
vectors: List of vectors to upsert with structure::
|
|
404
|
+
|
|
405
|
+
[
|
|
406
|
+
{
|
|
407
|
+
"id": str, # Unique document ID
|
|
408
|
+
"vector": list[float], # Embedding vector
|
|
409
|
+
"metadata": dict # Optional metadata
|
|
410
|
+
}
|
|
411
|
+
]
|
|
412
|
+
|
|
413
|
+
Returns
|
|
414
|
+
-------
|
|
415
|
+
Dictionary with upsert statistics::
|
|
416
|
+
|
|
417
|
+
{
|
|
418
|
+
"upserted_count": int,
|
|
419
|
+
"updated_count": int,
|
|
420
|
+
"failed_count": int
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
Raises
|
|
424
|
+
------
|
|
425
|
+
ValueError
|
|
426
|
+
If collection doesn't exist or vector dimensions mismatch
|
|
427
|
+
|
|
428
|
+
Examples
|
|
429
|
+
--------
|
|
430
|
+
Insert document embeddings::
|
|
431
|
+
|
|
432
|
+
await db.avector_upsert(
|
|
433
|
+
collection="documents",
|
|
434
|
+
vectors=[
|
|
435
|
+
{
|
|
436
|
+
"id": "doc1",
|
|
437
|
+
"vector": [0.1, 0.2, ...],
|
|
438
|
+
"metadata": {"title": "Document 1", "author": "John"}
|
|
439
|
+
}
|
|
440
|
+
]
|
|
441
|
+
)
|
|
442
|
+
"""
|
|
443
|
+
...
|
|
444
|
+
|
|
445
|
+
@abstractmethod
|
|
446
|
+
async def avector_delete(
|
|
447
|
+
self,
|
|
448
|
+
collection: str,
|
|
449
|
+
ids: list[str] | None = None,
|
|
450
|
+
filters: dict[str, Any] | None = None,
|
|
451
|
+
) -> dict[str, Any]:
|
|
452
|
+
"""Delete vectors from a collection.
|
|
453
|
+
|
|
454
|
+
Args
|
|
455
|
+
----
|
|
456
|
+
collection: Name of the vector collection/table
|
|
457
|
+
ids: Optional list of document IDs to delete
|
|
458
|
+
filters: Optional metadata filters for bulk deletion
|
|
459
|
+
|
|
460
|
+
Returns
|
|
461
|
+
-------
|
|
462
|
+
Dictionary with deletion statistics::
|
|
463
|
+
|
|
464
|
+
{
|
|
465
|
+
"deleted_count": int
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
Raises
|
|
469
|
+
------
|
|
470
|
+
ValueError
|
|
471
|
+
If neither ids nor filters are provided
|
|
472
|
+
|
|
473
|
+
Examples
|
|
474
|
+
--------
|
|
475
|
+
Delete by IDs::
|
|
476
|
+
|
|
477
|
+
await db.avector_delete(
|
|
478
|
+
collection="documents",
|
|
479
|
+
ids=["doc1", "doc2"]
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
Delete by metadata filter::
|
|
483
|
+
|
|
484
|
+
await db.avector_delete(
|
|
485
|
+
collection="documents",
|
|
486
|
+
filters={"category": "archived"}
|
|
487
|
+
)
|
|
488
|
+
"""
|
|
489
|
+
...
|