hexdag 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hexdag/__init__.py +116 -0
- hexdag/__main__.py +30 -0
- hexdag/adapters/executors/__init__.py +5 -0
- hexdag/adapters/executors/local_executor.py +316 -0
- hexdag/builtin/__init__.py +6 -0
- hexdag/builtin/adapters/__init__.py +51 -0
- hexdag/builtin/adapters/anthropic/__init__.py +5 -0
- hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
- hexdag/builtin/adapters/database/__init__.py +6 -0
- hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
- hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
- hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
- hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
- hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
- hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
- hexdag/builtin/adapters/local/README.md +59 -0
- hexdag/builtin/adapters/local/__init__.py +7 -0
- hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
- hexdag/builtin/adapters/memory/__init__.py +47 -0
- hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
- hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
- hexdag/builtin/adapters/memory/schemas.py +57 -0
- hexdag/builtin/adapters/memory/session_memory.py +178 -0
- hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
- hexdag/builtin/adapters/memory/state_memory.py +280 -0
- hexdag/builtin/adapters/mock/README.md +89 -0
- hexdag/builtin/adapters/mock/__init__.py +15 -0
- hexdag/builtin/adapters/mock/hexdag.toml +50 -0
- hexdag/builtin/adapters/mock/mock_database.py +225 -0
- hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
- hexdag/builtin/adapters/mock/mock_llm.py +177 -0
- hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
- hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
- hexdag/builtin/adapters/openai/__init__.py +5 -0
- hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
- hexdag/builtin/adapters/secret/__init__.py +7 -0
- hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
- hexdag/builtin/adapters/unified_tool_router.py +280 -0
- hexdag/builtin/macros/__init__.py +17 -0
- hexdag/builtin/macros/conversation_agent.py +390 -0
- hexdag/builtin/macros/llm_macro.py +151 -0
- hexdag/builtin/macros/reasoning_agent.py +423 -0
- hexdag/builtin/macros/tool_macro.py +380 -0
- hexdag/builtin/nodes/__init__.py +38 -0
- hexdag/builtin/nodes/_discovery.py +123 -0
- hexdag/builtin/nodes/agent_node.py +696 -0
- hexdag/builtin/nodes/base_node_factory.py +242 -0
- hexdag/builtin/nodes/composite_node.py +926 -0
- hexdag/builtin/nodes/data_node.py +201 -0
- hexdag/builtin/nodes/expression_node.py +487 -0
- hexdag/builtin/nodes/function_node.py +454 -0
- hexdag/builtin/nodes/llm_node.py +491 -0
- hexdag/builtin/nodes/loop_node.py +920 -0
- hexdag/builtin/nodes/mapped_input.py +518 -0
- hexdag/builtin/nodes/port_call_node.py +269 -0
- hexdag/builtin/nodes/tool_call_node.py +195 -0
- hexdag/builtin/nodes/tool_utils.py +390 -0
- hexdag/builtin/prompts/__init__.py +68 -0
- hexdag/builtin/prompts/base.py +422 -0
- hexdag/builtin/prompts/chat_prompts.py +303 -0
- hexdag/builtin/prompts/error_correction_prompts.py +320 -0
- hexdag/builtin/prompts/tool_prompts.py +160 -0
- hexdag/builtin/tools/builtin_tools.py +84 -0
- hexdag/builtin/tools/database_tools.py +164 -0
- hexdag/cli/__init__.py +17 -0
- hexdag/cli/__main__.py +7 -0
- hexdag/cli/commands/__init__.py +27 -0
- hexdag/cli/commands/build_cmd.py +812 -0
- hexdag/cli/commands/create_cmd.py +208 -0
- hexdag/cli/commands/docs_cmd.py +293 -0
- hexdag/cli/commands/generate_types_cmd.py +252 -0
- hexdag/cli/commands/init_cmd.py +188 -0
- hexdag/cli/commands/pipeline_cmd.py +494 -0
- hexdag/cli/commands/plugin_dev_cmd.py +529 -0
- hexdag/cli/commands/plugins_cmd.py +441 -0
- hexdag/cli/commands/studio_cmd.py +101 -0
- hexdag/cli/commands/validate_cmd.py +221 -0
- hexdag/cli/main.py +84 -0
- hexdag/core/__init__.py +83 -0
- hexdag/core/config/__init__.py +20 -0
- hexdag/core/config/loader.py +479 -0
- hexdag/core/config/models.py +150 -0
- hexdag/core/configurable.py +294 -0
- hexdag/core/context/__init__.py +37 -0
- hexdag/core/context/execution_context.py +378 -0
- hexdag/core/docs/__init__.py +26 -0
- hexdag/core/docs/extractors.py +678 -0
- hexdag/core/docs/generators.py +890 -0
- hexdag/core/docs/models.py +120 -0
- hexdag/core/domain/__init__.py +10 -0
- hexdag/core/domain/dag.py +1225 -0
- hexdag/core/exceptions.py +234 -0
- hexdag/core/expression_parser.py +569 -0
- hexdag/core/logging.py +449 -0
- hexdag/core/models/__init__.py +17 -0
- hexdag/core/models/base.py +138 -0
- hexdag/core/orchestration/__init__.py +46 -0
- hexdag/core/orchestration/body_executor.py +481 -0
- hexdag/core/orchestration/components/__init__.py +97 -0
- hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
- hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
- hexdag/core/orchestration/components/execution_coordinator.py +360 -0
- hexdag/core/orchestration/components/health_check_manager.py +176 -0
- hexdag/core/orchestration/components/input_mapper.py +143 -0
- hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
- hexdag/core/orchestration/components/node_executor.py +377 -0
- hexdag/core/orchestration/components/secret_manager.py +202 -0
- hexdag/core/orchestration/components/wave_executor.py +158 -0
- hexdag/core/orchestration/constants.py +17 -0
- hexdag/core/orchestration/events/README.md +312 -0
- hexdag/core/orchestration/events/__init__.py +104 -0
- hexdag/core/orchestration/events/batching.py +330 -0
- hexdag/core/orchestration/events/decorators.py +139 -0
- hexdag/core/orchestration/events/events.py +573 -0
- hexdag/core/orchestration/events/observers/__init__.py +30 -0
- hexdag/core/orchestration/events/observers/core_observers.py +690 -0
- hexdag/core/orchestration/events/observers/models.py +111 -0
- hexdag/core/orchestration/events/taxonomy.py +269 -0
- hexdag/core/orchestration/hook_context.py +237 -0
- hexdag/core/orchestration/hooks.py +437 -0
- hexdag/core/orchestration/models.py +418 -0
- hexdag/core/orchestration/orchestrator.py +910 -0
- hexdag/core/orchestration/orchestrator_factory.py +275 -0
- hexdag/core/orchestration/port_wrappers.py +327 -0
- hexdag/core/orchestration/prompt/__init__.py +32 -0
- hexdag/core/orchestration/prompt/template.py +332 -0
- hexdag/core/pipeline_builder/__init__.py +21 -0
- hexdag/core/pipeline_builder/component_instantiator.py +386 -0
- hexdag/core/pipeline_builder/include_tag.py +265 -0
- hexdag/core/pipeline_builder/pipeline_config.py +133 -0
- hexdag/core/pipeline_builder/py_tag.py +223 -0
- hexdag/core/pipeline_builder/tag_discovery.py +268 -0
- hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
- hexdag/core/pipeline_builder/yaml_validator.py +569 -0
- hexdag/core/ports/__init__.py +65 -0
- hexdag/core/ports/api_call.py +133 -0
- hexdag/core/ports/database.py +489 -0
- hexdag/core/ports/embedding.py +215 -0
- hexdag/core/ports/executor.py +237 -0
- hexdag/core/ports/file_storage.py +117 -0
- hexdag/core/ports/healthcheck.py +87 -0
- hexdag/core/ports/llm.py +551 -0
- hexdag/core/ports/memory.py +70 -0
- hexdag/core/ports/observer_manager.py +130 -0
- hexdag/core/ports/secret.py +145 -0
- hexdag/core/ports/tool_router.py +94 -0
- hexdag/core/ports_builder.py +623 -0
- hexdag/core/protocols.py +273 -0
- hexdag/core/resolver.py +304 -0
- hexdag/core/schema/__init__.py +9 -0
- hexdag/core/schema/generator.py +742 -0
- hexdag/core/secrets.py +242 -0
- hexdag/core/types.py +413 -0
- hexdag/core/utils/async_warnings.py +206 -0
- hexdag/core/utils/schema_conversion.py +78 -0
- hexdag/core/utils/sql_validation.py +86 -0
- hexdag/core/validation/secure_json.py +148 -0
- hexdag/core/yaml_macro.py +517 -0
- hexdag/mcp_server.py +3120 -0
- hexdag/studio/__init__.py +10 -0
- hexdag/studio/build_ui.py +92 -0
- hexdag/studio/server/__init__.py +1 -0
- hexdag/studio/server/main.py +100 -0
- hexdag/studio/server/routes/__init__.py +9 -0
- hexdag/studio/server/routes/execute.py +208 -0
- hexdag/studio/server/routes/export.py +558 -0
- hexdag/studio/server/routes/files.py +207 -0
- hexdag/studio/server/routes/plugins.py +419 -0
- hexdag/studio/server/routes/validate.py +220 -0
- hexdag/studio/ui/index.html +13 -0
- hexdag/studio/ui/package-lock.json +2992 -0
- hexdag/studio/ui/package.json +31 -0
- hexdag/studio/ui/postcss.config.js +6 -0
- hexdag/studio/ui/public/hexdag.svg +5 -0
- hexdag/studio/ui/src/App.tsx +251 -0
- hexdag/studio/ui/src/components/Canvas.tsx +408 -0
- hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
- hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
- hexdag/studio/ui/src/components/Header.tsx +181 -0
- hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
- hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
- hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
- hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
- hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
- hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
- hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
- hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
- hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
- hexdag/studio/ui/src/components/index.ts +8 -0
- hexdag/studio/ui/src/index.css +92 -0
- hexdag/studio/ui/src/main.tsx +10 -0
- hexdag/studio/ui/src/types/index.ts +123 -0
- hexdag/studio/ui/src/vite-env.d.ts +1 -0
- hexdag/studio/ui/tailwind.config.js +29 -0
- hexdag/studio/ui/tsconfig.json +37 -0
- hexdag/studio/ui/tsconfig.node.json +13 -0
- hexdag/studio/ui/vite.config.ts +35 -0
- hexdag/visualization/__init__.py +69 -0
- hexdag/visualization/dag_visualizer.py +1020 -0
- hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
- hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
- hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
- hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
- hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
- hexdag_plugins/.gitignore +43 -0
- hexdag_plugins/README.md +73 -0
- hexdag_plugins/__init__.py +1 -0
- hexdag_plugins/azure/LICENSE +21 -0
- hexdag_plugins/azure/README.md +414 -0
- hexdag_plugins/azure/__init__.py +21 -0
- hexdag_plugins/azure/azure_blob_adapter.py +450 -0
- hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
- hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
- hexdag_plugins/azure/azure_openai_adapter.py +415 -0
- hexdag_plugins/azure/pyproject.toml +107 -0
- hexdag_plugins/azure/tests/__init__.py +1 -0
- hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
- hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
- hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
- hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
- hexdag_plugins/hexdag_etl/README.md +168 -0
- hexdag_plugins/hexdag_etl/__init__.py +53 -0
- hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
- hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
- hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
- hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
- hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
- hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
- hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
- hexdag_plugins/hexdag_etl/test_transform.py +54 -0
- hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
- hexdag_plugins/mysql_adapter/LICENSE +21 -0
- hexdag_plugins/mysql_adapter/README.md +224 -0
- hexdag_plugins/mysql_adapter/__init__.py +6 -0
- hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
- hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
- hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
- hexdag_plugins/storage/README.md +184 -0
- hexdag_plugins/storage/__init__.py +19 -0
- hexdag_plugins/storage/file/__init__.py +5 -0
- hexdag_plugins/storage/file/local.py +325 -0
- hexdag_plugins/storage/ports/__init__.py +5 -0
- hexdag_plugins/storage/ports/vector_store.py +236 -0
- hexdag_plugins/storage/sql/__init__.py +7 -0
- hexdag_plugins/storage/sql/base.py +187 -0
- hexdag_plugins/storage/sql/mysql.py +27 -0
- hexdag_plugins/storage/sql/postgresql.py +27 -0
- hexdag_plugins/storage/tests/__init__.py +1 -0
- hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
- hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
- hexdag_plugins/storage/vector/__init__.py +7 -0
- hexdag_plugins/storage/vector/chromadb.py +223 -0
- hexdag_plugins/storage/vector/in_memory.py +285 -0
- hexdag_plugins/storage/vector/pgvector.py +502 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Runtime warnings for synchronous I/O operations in async contexts.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to detect and warn about blocking I/O operations
|
|
4
|
+
at runtime when executing within async functions or coroutines.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import functools
|
|
11
|
+
import inspect
|
|
12
|
+
import warnings
|
|
13
|
+
from typing import TYPE_CHECKING, Any, TypeVar
|
|
14
|
+
|
|
15
|
+
from hexdag.core.logging import get_logger
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Callable
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
T = TypeVar("T")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AsyncIOWarning(UserWarning):
|
|
26
|
+
"""Warning emitted when sync I/O is detected in async context."""
|
|
27
|
+
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _is_in_async_context() -> bool:
|
|
32
|
+
"""Check if code is running in an async context.
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
True if running in async context (event loop is running)
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
asyncio.get_running_loop()
|
|
40
|
+
return True
|
|
41
|
+
except RuntimeError:
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def warn_sync_io(operation: str, suggestion: str | None = None) -> None:
|
|
46
|
+
"""Emit a warning about synchronous I/O in async context.
|
|
47
|
+
|
|
48
|
+
Args
|
|
49
|
+
----
|
|
50
|
+
operation: Description of the blocking operation
|
|
51
|
+
suggestion: Optional suggestion for async alternative
|
|
52
|
+
"""
|
|
53
|
+
if not _is_in_async_context():
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
frame = inspect.currentframe()
|
|
57
|
+
if frame and frame.f_back:
|
|
58
|
+
caller_frame = frame.f_back
|
|
59
|
+
filename = caller_frame.f_code.co_filename
|
|
60
|
+
line_number = caller_frame.f_lineno
|
|
61
|
+
function_name = caller_frame.f_code.co_name
|
|
62
|
+
|
|
63
|
+
message = (
|
|
64
|
+
f"Blocking I/O operation '{operation}' detected in async context "
|
|
65
|
+
f"(function: {function_name}, {filename}:{line_number})"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if suggestion:
|
|
69
|
+
message += f". {suggestion}"
|
|
70
|
+
|
|
71
|
+
warnings.warn(message, AsyncIOWarning, stacklevel=3)
|
|
72
|
+
logger.warning(message)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def warn_if_async[T](func: Callable[..., T]) -> Callable[..., T]:
|
|
76
|
+
"""Warn if a synchronous function is called in async context.
|
|
77
|
+
|
|
78
|
+
Args
|
|
79
|
+
----
|
|
80
|
+
func: Function to wrap
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
Wrapped function that warns if called in async context
|
|
85
|
+
|
|
86
|
+
Example
|
|
87
|
+
-------
|
|
88
|
+
>>> @warn_if_async
|
|
89
|
+
... def sync_database_query(sql: str) -> list:
|
|
90
|
+
... return connection.execute(sql).fetchall()
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
@functools.wraps(func)
|
|
94
|
+
def wrapper(*args: Any, **kwargs: Any) -> T:
|
|
95
|
+
if _is_in_async_context():
|
|
96
|
+
warn_sync_io(
|
|
97
|
+
f"{func.__name__}()",
|
|
98
|
+
f"Consider using an async version of {func.__name__}",
|
|
99
|
+
)
|
|
100
|
+
return func(*args, **kwargs)
|
|
101
|
+
|
|
102
|
+
return wrapper
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class SyncIOMonitor:
|
|
106
|
+
"""Context manager to monitor and warn about sync I/O operations.
|
|
107
|
+
|
|
108
|
+
This can be used to wrap code sections that should use async I/O but might
|
|
109
|
+
accidentally use blocking operations.
|
|
110
|
+
|
|
111
|
+
Example
|
|
112
|
+
-------
|
|
113
|
+
>>> async def process_data():
|
|
114
|
+
... with SyncIOMonitor("data processing"):
|
|
115
|
+
... # Any blocking I/O here will trigger warnings
|
|
116
|
+
... data = process_file() # Would warn if this blocks
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
def __init__(self, context_name: str = "code block") -> None:
|
|
120
|
+
"""Initialize the monitor.
|
|
121
|
+
|
|
122
|
+
Args
|
|
123
|
+
----
|
|
124
|
+
context_name: Name of the context being monitored
|
|
125
|
+
"""
|
|
126
|
+
self.context_name = context_name
|
|
127
|
+
self.is_async = False
|
|
128
|
+
|
|
129
|
+
def __enter__(self) -> SyncIOMonitor:
|
|
130
|
+
"""Enter the monitoring context."""
|
|
131
|
+
self.is_async = _is_in_async_context()
|
|
132
|
+
return self
|
|
133
|
+
|
|
134
|
+
def __exit__(
|
|
135
|
+
self,
|
|
136
|
+
_exc_type: Any, # noqa: ARG002
|
|
137
|
+
_exc_val: Any, # noqa: ARG002
|
|
138
|
+
_exc_tb: Any, # noqa: ARG002
|
|
139
|
+
) -> None:
|
|
140
|
+
"""Exit the monitoring context."""
|
|
141
|
+
pass
|
|
142
|
+
|
|
143
|
+
def check_operation(self, operation: str, suggestion: str | None = None) -> None:
|
|
144
|
+
"""Check and warn about an operation if in async context.
|
|
145
|
+
|
|
146
|
+
Args
|
|
147
|
+
----
|
|
148
|
+
operation: Description of the operation
|
|
149
|
+
suggestion: Optional suggestion for async alternative
|
|
150
|
+
"""
|
|
151
|
+
if self.is_async:
|
|
152
|
+
warn_sync_io(operation, suggestion)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# Commonly monitored operations
|
|
156
|
+
def warn_file_open(path: str) -> None:
|
|
157
|
+
"""Warn about sync file open in async context.
|
|
158
|
+
|
|
159
|
+
Args
|
|
160
|
+
----
|
|
161
|
+
path: File path being opened
|
|
162
|
+
"""
|
|
163
|
+
warn_sync_io(
|
|
164
|
+
f"open('{path}')",
|
|
165
|
+
"Use aiofiles.open() for async file I/O",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def warn_sqlite_connect(db_path: str) -> None:
|
|
170
|
+
"""Warn about sync SQLite connection in async context.
|
|
171
|
+
|
|
172
|
+
Args
|
|
173
|
+
----
|
|
174
|
+
db_path: Database path
|
|
175
|
+
"""
|
|
176
|
+
warn_sync_io(
|
|
177
|
+
f"sqlite3.connect('{db_path}')",
|
|
178
|
+
"Use aiosqlite.connect() for async database operations",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def warn_requests_call(method: str, url: str) -> None:
|
|
183
|
+
"""Warn about sync HTTP request in async context.
|
|
184
|
+
|
|
185
|
+
Args
|
|
186
|
+
----
|
|
187
|
+
method: HTTP method (GET, POST, etc.)
|
|
188
|
+
url: Request URL
|
|
189
|
+
"""
|
|
190
|
+
warn_sync_io(
|
|
191
|
+
f"requests.{method.lower()}('{url}')",
|
|
192
|
+
"Use aiohttp.ClientSession for async HTTP requests",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def warn_time_sleep(seconds: float) -> None:
|
|
197
|
+
"""Warn about sync sleep in async context.
|
|
198
|
+
|
|
199
|
+
Args
|
|
200
|
+
----
|
|
201
|
+
seconds: Sleep duration
|
|
202
|
+
"""
|
|
203
|
+
warn_sync_io(
|
|
204
|
+
f"time.sleep({seconds})",
|
|
205
|
+
"Use await asyncio.sleep() in async functions",
|
|
206
|
+
)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Shared utilities for converting YAML schema strings to Python types.
|
|
2
|
+
|
|
3
|
+
This module provides utilities used by both YAML pipeline builders and node factories
|
|
4
|
+
to convert YAML-friendly type representations to actual Python types.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from functools import singledispatch
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
# Supported type name mappings
|
|
11
|
+
VALID_TYPE_NAMES: dict[str, Any] = {
|
|
12
|
+
"str": str,
|
|
13
|
+
"int": int,
|
|
14
|
+
"float": float,
|
|
15
|
+
"bool": bool,
|
|
16
|
+
"list": list,
|
|
17
|
+
"dict": dict,
|
|
18
|
+
"Any": Any,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@singledispatch
|
|
23
|
+
def normalize_schema(schema: Any) -> Any:
|
|
24
|
+
"""Normalize schema to use Python types (accepts both string names and type objects).
|
|
25
|
+
|
|
26
|
+
Uses singledispatch to handle different input types elegantly.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
schema: Schema in various formats (dict, type, Pydantic model, etc.)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Normalized schema with actual Python type objects
|
|
33
|
+
|
|
34
|
+
Examples:
|
|
35
|
+
>>> normalize_schema({"name": "str"}) # YAML format
|
|
36
|
+
{'name': <class 'str'>}
|
|
37
|
+
|
|
38
|
+
>>> normalize_schema({"name": str}) # Already normalized
|
|
39
|
+
{'name': <class 'str'>}
|
|
40
|
+
|
|
41
|
+
>>> normalize_schema(str) # Pass-through for types
|
|
42
|
+
<class 'str'>
|
|
43
|
+
"""
|
|
44
|
+
# Default: pass through for types, Pydantic models, etc.
|
|
45
|
+
return schema
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@normalize_schema.register(dict)
|
|
49
|
+
def _(schema: dict) -> dict[str, type]:
|
|
50
|
+
"""Convert dict schema with string type names to Python types."""
|
|
51
|
+
converted: dict[str, Any] = {}
|
|
52
|
+
|
|
53
|
+
for key, value in schema.items():
|
|
54
|
+
if isinstance(value, str):
|
|
55
|
+
# String type name - convert to actual type
|
|
56
|
+
if value not in VALID_TYPE_NAMES:
|
|
57
|
+
valid_names = ", ".join(sorted(VALID_TYPE_NAMES.keys()))
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"Invalid type '{value}' for field '{key}'. Supported types: {valid_names}"
|
|
60
|
+
)
|
|
61
|
+
converted[key] = VALID_TYPE_NAMES[value]
|
|
62
|
+
elif isinstance(value, dict):
|
|
63
|
+
# Nested schema - recurse
|
|
64
|
+
converted[key] = normalize_schema(value)
|
|
65
|
+
elif isinstance(value, type):
|
|
66
|
+
# Already a type - pass through
|
|
67
|
+
converted[key] = value
|
|
68
|
+
else:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"Field '{key}' has invalid value: {value!r}. "
|
|
71
|
+
f"Expected type name string, type object, or nested schema dict."
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return converted
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# Backward compatibility alias
|
|
78
|
+
convert_yaml_schema = normalize_schema
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""SQL validation utilities for preventing injection attacks."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from hexdag.core.logging import get_logger
|
|
6
|
+
|
|
7
|
+
logger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def validate_sql_identifier(
|
|
11
|
+
identifier: str,
|
|
12
|
+
identifier_type: str = "identifier",
|
|
13
|
+
raise_on_invalid: bool = False,
|
|
14
|
+
) -> bool:
|
|
15
|
+
"""Validate SQL identifier to prevent injection attacks.
|
|
16
|
+
|
|
17
|
+
SQL identifiers (table names, column names, etc.) must follow specific rules
|
|
18
|
+
to prevent SQL injection attacks. This function validates that an identifier
|
|
19
|
+
contains only safe characters.
|
|
20
|
+
|
|
21
|
+
Valid identifiers:
|
|
22
|
+
- Start with a letter (a-z, A-Z) or underscore (_)
|
|
23
|
+
- Contain only letters, numbers, and underscores
|
|
24
|
+
- Examples: "users", "user_data", "Table123", "_private"
|
|
25
|
+
|
|
26
|
+
Invalid identifiers:
|
|
27
|
+
- Start with numbers: "123table"
|
|
28
|
+
- Contain special characters: "user-data", "user.table", "user name"
|
|
29
|
+
- SQL keywords without quoting: "SELECT", "DROP"
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
identifier : str
|
|
34
|
+
The SQL identifier to validate (e.g., table name, column name)
|
|
35
|
+
identifier_type : str, optional
|
|
36
|
+
Human-readable type name for error messages (default: "identifier")
|
|
37
|
+
Examples: "table", "column", "database"
|
|
38
|
+
raise_on_invalid : bool, optional
|
|
39
|
+
If True, raises ValueError on invalid identifier
|
|
40
|
+
If False, logs warning and returns False (default: False)
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
bool
|
|
45
|
+
True if identifier is valid, False otherwise
|
|
46
|
+
|
|
47
|
+
Raises
|
|
48
|
+
------
|
|
49
|
+
ValueError
|
|
50
|
+
If raise_on_invalid=True and identifier is invalid
|
|
51
|
+
|
|
52
|
+
Examples
|
|
53
|
+
--------
|
|
54
|
+
>>> validate_sql_identifier("users")
|
|
55
|
+
True
|
|
56
|
+
|
|
57
|
+
>>> validate_sql_identifier("user_data")
|
|
58
|
+
True
|
|
59
|
+
|
|
60
|
+
>>> validate_sql_identifier("123invalid")
|
|
61
|
+
False
|
|
62
|
+
|
|
63
|
+
>>> validate_sql_identifier("user-data")
|
|
64
|
+
False
|
|
65
|
+
|
|
66
|
+
>>> validate_sql_identifier("user.table", "table", raise_on_invalid=True) # doctest: +SKIP
|
|
67
|
+
Traceback (most recent call last):
|
|
68
|
+
...
|
|
69
|
+
ValueError: Invalid table 'user.table'. Must start with letter/underscore...
|
|
70
|
+
"""
|
|
71
|
+
# Validate against safe pattern: starts with letter/underscore, contains only alphanumerics/_
|
|
72
|
+
is_valid = bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", identifier))
|
|
73
|
+
|
|
74
|
+
if not is_valid:
|
|
75
|
+
msg = (
|
|
76
|
+
f"Invalid {identifier_type} '{identifier}'. "
|
|
77
|
+
"Must start with letter/underscore and contain only "
|
|
78
|
+
"letters, numbers, and underscores."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if raise_on_invalid:
|
|
82
|
+
raise ValueError(msg)
|
|
83
|
+
|
|
84
|
+
logger.warning(msg)
|
|
85
|
+
|
|
86
|
+
return is_valid
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
import orjson
|
|
8
|
+
|
|
9
|
+
ErrorCode = Literal[
|
|
10
|
+
"too_large",
|
|
11
|
+
"too_deep",
|
|
12
|
+
"invalid_syntax",
|
|
13
|
+
"unrecoverable",
|
|
14
|
+
"no_json_found",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class SafeJSONResult:
|
|
23
|
+
data: Any | None = None
|
|
24
|
+
error: ErrorCode | None = None
|
|
25
|
+
message: str | None = None
|
|
26
|
+
line: int | None = None
|
|
27
|
+
col: int | None = None
|
|
28
|
+
preview: str | None = None
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def ok(self) -> bool:
|
|
32
|
+
return self.error is None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SafeJSON:
|
|
36
|
+
def __init__(self, max_size_bytes: int = 1_000_000, max_depth: int = 20):
|
|
37
|
+
self.max_size_bytes = max_size_bytes
|
|
38
|
+
self.max_depth = max_depth
|
|
39
|
+
|
|
40
|
+
def loads(self, data: str | bytes | bytearray) -> SafeJSONResult:
|
|
41
|
+
text = (
|
|
42
|
+
data.decode("utf-8", errors="strict")
|
|
43
|
+
if isinstance(data, (bytes, bytearray))
|
|
44
|
+
else str(data)
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if len(text.encode("utf-8")) > self.max_size_bytes:
|
|
48
|
+
return SafeJSONResult(error="too_large", message="JSON exceeds size limit")
|
|
49
|
+
|
|
50
|
+
if self._estimate_depth(text) > self.max_depth:
|
|
51
|
+
return SafeJSONResult(error="too_deep", message="JSON exceeds depth limit")
|
|
52
|
+
|
|
53
|
+
# Step 1: try orjson
|
|
54
|
+
try:
|
|
55
|
+
return SafeJSONResult(data=orjson.loads(text))
|
|
56
|
+
except Exception as exc:
|
|
57
|
+
logger.debug("orjson initial parse failed; attempting cleanup fallback: %s", exc)
|
|
58
|
+
|
|
59
|
+
# Step 2: cleanup + retry
|
|
60
|
+
cleaned = self._cleanup(text)
|
|
61
|
+
if (
|
|
62
|
+
len(cleaned.encode("utf-8")) <= self.max_size_bytes
|
|
63
|
+
and self._estimate_depth(cleaned) <= self.max_depth
|
|
64
|
+
):
|
|
65
|
+
try:
|
|
66
|
+
return SafeJSONResult(data=orjson.loads(cleaned))
|
|
67
|
+
except Exception as exc: # noqa: BLE001 - we deliberately log and fall back
|
|
68
|
+
logger.debug(
|
|
69
|
+
"orjson cleaned parse failed; falling back to stdlib for diagnostics: %s", exc
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Step 3: stdlib json for diagnostics
|
|
73
|
+
try:
|
|
74
|
+
return SafeJSONResult(data=json.loads(cleaned, parse_constant=lambda _: None))
|
|
75
|
+
except json.JSONDecodeError as e:
|
|
76
|
+
preview = self._format_error_line(cleaned, e.lineno, e.colno)
|
|
77
|
+
return SafeJSONResult(
|
|
78
|
+
error="invalid_syntax",
|
|
79
|
+
message=e.msg,
|
|
80
|
+
line=e.lineno,
|
|
81
|
+
col=e.colno,
|
|
82
|
+
preview=preview,
|
|
83
|
+
)
|
|
84
|
+
except Exception:
|
|
85
|
+
return SafeJSONResult(error="unrecoverable", message="Unrecoverable JSON")
|
|
86
|
+
|
|
87
|
+
def loads_from_text(self, text: str) -> SafeJSONResult:
|
|
88
|
+
candidate = self._extract_json(text)
|
|
89
|
+
if not candidate:
|
|
90
|
+
return SafeJSONResult(error="no_json_found", message="No JSON found in text")
|
|
91
|
+
return self.loads(candidate)
|
|
92
|
+
|
|
93
|
+
# ------------------------------------------------------------------
|
|
94
|
+
# Helpers
|
|
95
|
+
# ------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def _cleanup(text: str) -> str:
|
|
99
|
+
text = re.sub(r"(?m)\s*(//|#).*?$", "", text)
|
|
100
|
+
text = re.sub(r",\s*([}\]])", r"\1", text)
|
|
101
|
+
return re.sub(r"(?<!\\)'([^']*?)'(?!\\)", r'"\1"', text)
|
|
102
|
+
|
|
103
|
+
@staticmethod
|
|
104
|
+
def _extract_json(text: str) -> str | None:
|
|
105
|
+
if not text:
|
|
106
|
+
return None
|
|
107
|
+
match = re.search(r"```json\s*([\s\S]*?)```", text, re.IGNORECASE)
|
|
108
|
+
if match:
|
|
109
|
+
return match.group(1).strip()
|
|
110
|
+
match = re.search(r"```[\w-]*\s*([\s\S]*?)```", text)
|
|
111
|
+
if match and match.group(1).lstrip().startswith(("{", "[")):
|
|
112
|
+
return match.group(1).strip()
|
|
113
|
+
match = re.search(r"[\[{][\s\S]*[\]}]", text)
|
|
114
|
+
return match.group(0).strip() if match else None
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def _estimate_depth(text: str) -> int:
|
|
118
|
+
depth = 0
|
|
119
|
+
max_depth = 0
|
|
120
|
+
in_str: str | None = None
|
|
121
|
+
esc = False
|
|
122
|
+
for ch in text:
|
|
123
|
+
if in_str:
|
|
124
|
+
if esc:
|
|
125
|
+
esc = False
|
|
126
|
+
elif ch == "\\":
|
|
127
|
+
esc = True
|
|
128
|
+
elif ch == in_str:
|
|
129
|
+
in_str = None
|
|
130
|
+
continue
|
|
131
|
+
if ch in ('"', "'"):
|
|
132
|
+
in_str = ch
|
|
133
|
+
elif ch in "{[":
|
|
134
|
+
depth += 1
|
|
135
|
+
max_depth = max(max_depth, depth)
|
|
136
|
+
elif ch in "]}":
|
|
137
|
+
depth = max(depth - 1, 0)
|
|
138
|
+
return max_depth
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def _format_error_line(text: str, line_no: int, col_no: int, context: int = 1) -> str | None:
|
|
142
|
+
"""Return a snippet of the line with a caret pointing at the error col."""
|
|
143
|
+
lines = text.splitlines()
|
|
144
|
+
if 1 <= line_no <= len(lines):
|
|
145
|
+
line = lines[line_no - 1]
|
|
146
|
+
caret_line = " " * (col_no - 1) + "^"
|
|
147
|
+
return f"{line}\n{caret_line}"
|
|
148
|
+
return None
|