hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,206 @@
1
+ """Runtime warnings for synchronous I/O operations in async contexts.
2
+
3
+ This module provides utilities to detect and warn about blocking I/O operations
4
+ at runtime when executing within async functions or coroutines.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import functools
11
+ import inspect
12
+ import warnings
13
+ from typing import TYPE_CHECKING, Any, TypeVar
14
+
15
+ from hexdag.core.logging import get_logger
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import Callable
19
+
20
+ logger = get_logger(__name__)
21
+
22
+ T = TypeVar("T")
23
+
24
+
25
+ class AsyncIOWarning(UserWarning):
26
+ """Warning emitted when sync I/O is detected in async context."""
27
+
28
+ pass
29
+
30
+
31
+ def _is_in_async_context() -> bool:
32
+ """Check if code is running in an async context.
33
+
34
+ Returns
35
+ -------
36
+ True if running in async context (event loop is running)
37
+ """
38
+ try:
39
+ asyncio.get_running_loop()
40
+ return True
41
+ except RuntimeError:
42
+ return False
43
+
44
+
45
+ def warn_sync_io(operation: str, suggestion: str | None = None) -> None:
46
+ """Emit a warning about synchronous I/O in async context.
47
+
48
+ Args
49
+ ----
50
+ operation: Description of the blocking operation
51
+ suggestion: Optional suggestion for async alternative
52
+ """
53
+ if not _is_in_async_context():
54
+ return
55
+
56
+ frame = inspect.currentframe()
57
+ if frame and frame.f_back:
58
+ caller_frame = frame.f_back
59
+ filename = caller_frame.f_code.co_filename
60
+ line_number = caller_frame.f_lineno
61
+ function_name = caller_frame.f_code.co_name
62
+
63
+ message = (
64
+ f"Blocking I/O operation '{operation}' detected in async context "
65
+ f"(function: {function_name}, {filename}:{line_number})"
66
+ )
67
+
68
+ if suggestion:
69
+ message += f". {suggestion}"
70
+
71
+ warnings.warn(message, AsyncIOWarning, stacklevel=3)
72
+ logger.warning(message)
73
+
74
+
75
+ def warn_if_async[T](func: Callable[..., T]) -> Callable[..., T]:
76
+ """Warn if a synchronous function is called in async context.
77
+
78
+ Args
79
+ ----
80
+ func: Function to wrap
81
+
82
+ Returns
83
+ -------
84
+ Wrapped function that warns if called in async context
85
+
86
+ Example
87
+ -------
88
+ >>> @warn_if_async
89
+ ... def sync_database_query(sql: str) -> list:
90
+ ... return connection.execute(sql).fetchall()
91
+ """
92
+
93
+ @functools.wraps(func)
94
+ def wrapper(*args: Any, **kwargs: Any) -> T:
95
+ if _is_in_async_context():
96
+ warn_sync_io(
97
+ f"{func.__name__}()",
98
+ f"Consider using an async version of {func.__name__}",
99
+ )
100
+ return func(*args, **kwargs)
101
+
102
+ return wrapper
103
+
104
+
105
+ class SyncIOMonitor:
106
+ """Context manager to monitor and warn about sync I/O operations.
107
+
108
+ This can be used to wrap code sections that should use async I/O but might
109
+ accidentally use blocking operations.
110
+
111
+ Example
112
+ -------
113
+ >>> async def process_data():
114
+ ... with SyncIOMonitor("data processing"):
115
+ ... # Any blocking I/O here will trigger warnings
116
+ ... data = process_file() # Would warn if this blocks
117
+ """
118
+
119
+ def __init__(self, context_name: str = "code block") -> None:
120
+ """Initialize the monitor.
121
+
122
+ Args
123
+ ----
124
+ context_name: Name of the context being monitored
125
+ """
126
+ self.context_name = context_name
127
+ self.is_async = False
128
+
129
+ def __enter__(self) -> SyncIOMonitor:
130
+ """Enter the monitoring context."""
131
+ self.is_async = _is_in_async_context()
132
+ return self
133
+
134
+ def __exit__(
135
+ self,
136
+ _exc_type: Any, # noqa: ARG002
137
+ _exc_val: Any, # noqa: ARG002
138
+ _exc_tb: Any, # noqa: ARG002
139
+ ) -> None:
140
+ """Exit the monitoring context."""
141
+ pass
142
+
143
+ def check_operation(self, operation: str, suggestion: str | None = None) -> None:
144
+ """Check and warn about an operation if in async context.
145
+
146
+ Args
147
+ ----
148
+ operation: Description of the operation
149
+ suggestion: Optional suggestion for async alternative
150
+ """
151
+ if self.is_async:
152
+ warn_sync_io(operation, suggestion)
153
+
154
+
155
+ # Commonly monitored operations
156
+ def warn_file_open(path: str) -> None:
157
+ """Warn about sync file open in async context.
158
+
159
+ Args
160
+ ----
161
+ path: File path being opened
162
+ """
163
+ warn_sync_io(
164
+ f"open('{path}')",
165
+ "Use aiofiles.open() for async file I/O",
166
+ )
167
+
168
+
169
+ def warn_sqlite_connect(db_path: str) -> None:
170
+ """Warn about sync SQLite connection in async context.
171
+
172
+ Args
173
+ ----
174
+ db_path: Database path
175
+ """
176
+ warn_sync_io(
177
+ f"sqlite3.connect('{db_path}')",
178
+ "Use aiosqlite.connect() for async database operations",
179
+ )
180
+
181
+
182
+ def warn_requests_call(method: str, url: str) -> None:
183
+ """Warn about sync HTTP request in async context.
184
+
185
+ Args
186
+ ----
187
+ method: HTTP method (GET, POST, etc.)
188
+ url: Request URL
189
+ """
190
+ warn_sync_io(
191
+ f"requests.{method.lower()}('{url}')",
192
+ "Use aiohttp.ClientSession for async HTTP requests",
193
+ )
194
+
195
+
196
+ def warn_time_sleep(seconds: float) -> None:
197
+ """Warn about sync sleep in async context.
198
+
199
+ Args
200
+ ----
201
+ seconds: Sleep duration
202
+ """
203
+ warn_sync_io(
204
+ f"time.sleep({seconds})",
205
+ "Use await asyncio.sleep() in async functions",
206
+ )
@@ -0,0 +1,78 @@
1
+ """Shared utilities for converting YAML schema strings to Python types.
2
+
3
+ This module provides utilities used by both YAML pipeline builders and node factories
4
+ to convert YAML-friendly type representations to actual Python types.
5
+ """
6
+
7
+ from functools import singledispatch
8
+ from typing import Any
9
+
10
+ # Supported type name mappings
11
+ VALID_TYPE_NAMES: dict[str, Any] = {
12
+ "str": str,
13
+ "int": int,
14
+ "float": float,
15
+ "bool": bool,
16
+ "list": list,
17
+ "dict": dict,
18
+ "Any": Any,
19
+ }
20
+
21
+
22
+ @singledispatch
23
+ def normalize_schema(schema: Any) -> Any:
24
+ """Normalize schema to use Python types (accepts both string names and type objects).
25
+
26
+ Uses singledispatch to handle different input types elegantly.
27
+
28
+ Args:
29
+ schema: Schema in various formats (dict, type, Pydantic model, etc.)
30
+
31
+ Returns:
32
+ Normalized schema with actual Python type objects
33
+
34
+ Examples:
35
+ >>> normalize_schema({"name": "str"}) # YAML format
36
+ {'name': <class 'str'>}
37
+
38
+ >>> normalize_schema({"name": str}) # Already normalized
39
+ {'name': <class 'str'>}
40
+
41
+ >>> normalize_schema(str) # Pass-through for types
42
+ <class 'str'>
43
+ """
44
+ # Default: pass through for types, Pydantic models, etc.
45
+ return schema
46
+
47
+
48
+ @normalize_schema.register(dict)
49
+ def _(schema: dict) -> dict[str, type]:
50
+ """Convert dict schema with string type names to Python types."""
51
+ converted: dict[str, Any] = {}
52
+
53
+ for key, value in schema.items():
54
+ if isinstance(value, str):
55
+ # String type name - convert to actual type
56
+ if value not in VALID_TYPE_NAMES:
57
+ valid_names = ", ".join(sorted(VALID_TYPE_NAMES.keys()))
58
+ raise ValueError(
59
+ f"Invalid type '{value}' for field '{key}'. Supported types: {valid_names}"
60
+ )
61
+ converted[key] = VALID_TYPE_NAMES[value]
62
+ elif isinstance(value, dict):
63
+ # Nested schema - recurse
64
+ converted[key] = normalize_schema(value)
65
+ elif isinstance(value, type):
66
+ # Already a type - pass through
67
+ converted[key] = value
68
+ else:
69
+ raise ValueError(
70
+ f"Field '{key}' has invalid value: {value!r}. "
71
+ f"Expected type name string, type object, or nested schema dict."
72
+ )
73
+
74
+ return converted
75
+
76
+
77
+ # Backward compatibility alias
78
+ convert_yaml_schema = normalize_schema
@@ -0,0 +1,86 @@
1
+ """SQL validation utilities for preventing injection attacks."""
2
+
3
+ import re
4
+
5
+ from hexdag.core.logging import get_logger
6
+
7
+ logger = get_logger(__name__)
8
+
9
+
10
+ def validate_sql_identifier(
11
+ identifier: str,
12
+ identifier_type: str = "identifier",
13
+ raise_on_invalid: bool = False,
14
+ ) -> bool:
15
+ """Validate SQL identifier to prevent injection attacks.
16
+
17
+ SQL identifiers (table names, column names, etc.) must follow specific rules
18
+ to prevent SQL injection attacks. This function validates that an identifier
19
+ contains only safe characters.
20
+
21
+ Valid identifiers:
22
+ - Start with a letter (a-z, A-Z) or underscore (_)
23
+ - Contain only letters, numbers, and underscores
24
+ - Examples: "users", "user_data", "Table123", "_private"
25
+
26
+ Invalid identifiers:
27
+ - Start with numbers: "123table"
28
+ - Contain special characters: "user-data", "user.table", "user name"
29
+ - SQL keywords without quoting: "SELECT", "DROP"
30
+
31
+ Parameters
32
+ ----------
33
+ identifier : str
34
+ The SQL identifier to validate (e.g., table name, column name)
35
+ identifier_type : str, optional
36
+ Human-readable type name for error messages (default: "identifier")
37
+ Examples: "table", "column", "database"
38
+ raise_on_invalid : bool, optional
39
+ If True, raises ValueError on invalid identifier
40
+ If False, logs warning and returns False (default: False)
41
+
42
+ Returns
43
+ -------
44
+ bool
45
+ True if identifier is valid, False otherwise
46
+
47
+ Raises
48
+ ------
49
+ ValueError
50
+ If raise_on_invalid=True and identifier is invalid
51
+
52
+ Examples
53
+ --------
54
+ >>> validate_sql_identifier("users")
55
+ True
56
+
57
+ >>> validate_sql_identifier("user_data")
58
+ True
59
+
60
+ >>> validate_sql_identifier("123invalid")
61
+ False
62
+
63
+ >>> validate_sql_identifier("user-data")
64
+ False
65
+
66
+ >>> validate_sql_identifier("user.table", "table", raise_on_invalid=True) # doctest: +SKIP
67
+ Traceback (most recent call last):
68
+ ...
69
+ ValueError: Invalid table 'user.table'. Must start with letter/underscore...
70
+ """
71
+ # Validate against safe pattern: starts with letter/underscore, contains only alphanumerics/_
72
+ is_valid = bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", identifier))
73
+
74
+ if not is_valid:
75
+ msg = (
76
+ f"Invalid {identifier_type} '{identifier}'. "
77
+ "Must start with letter/underscore and contain only "
78
+ "letters, numbers, and underscores."
79
+ )
80
+
81
+ if raise_on_invalid:
82
+ raise ValueError(msg)
83
+
84
+ logger.warning(msg)
85
+
86
+ return is_valid
@@ -0,0 +1,148 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Any, Literal
6
+
7
+ import orjson
8
+
9
+ ErrorCode = Literal[
10
+ "too_large",
11
+ "too_deep",
12
+ "invalid_syntax",
13
+ "unrecoverable",
14
+ "no_json_found",
15
+ ]
16
+
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class SafeJSONResult:
23
+ data: Any | None = None
24
+ error: ErrorCode | None = None
25
+ message: str | None = None
26
+ line: int | None = None
27
+ col: int | None = None
28
+ preview: str | None = None
29
+
30
+ @property
31
+ def ok(self) -> bool:
32
+ return self.error is None
33
+
34
+
35
+ class SafeJSON:
36
+ def __init__(self, max_size_bytes: int = 1_000_000, max_depth: int = 20):
37
+ self.max_size_bytes = max_size_bytes
38
+ self.max_depth = max_depth
39
+
40
+ def loads(self, data: str | bytes | bytearray) -> SafeJSONResult:
41
+ text = (
42
+ data.decode("utf-8", errors="strict")
43
+ if isinstance(data, (bytes, bytearray))
44
+ else str(data)
45
+ )
46
+
47
+ if len(text.encode("utf-8")) > self.max_size_bytes:
48
+ return SafeJSONResult(error="too_large", message="JSON exceeds size limit")
49
+
50
+ if self._estimate_depth(text) > self.max_depth:
51
+ return SafeJSONResult(error="too_deep", message="JSON exceeds depth limit")
52
+
53
+ # Step 1: try orjson
54
+ try:
55
+ return SafeJSONResult(data=orjson.loads(text))
56
+ except Exception as exc:
57
+ logger.debug("orjson initial parse failed; attempting cleanup fallback: %s", exc)
58
+
59
+ # Step 2: cleanup + retry
60
+ cleaned = self._cleanup(text)
61
+ if (
62
+ len(cleaned.encode("utf-8")) <= self.max_size_bytes
63
+ and self._estimate_depth(cleaned) <= self.max_depth
64
+ ):
65
+ try:
66
+ return SafeJSONResult(data=orjson.loads(cleaned))
67
+ except Exception as exc: # noqa: BLE001 - we deliberately log and fall back
68
+ logger.debug(
69
+ "orjson cleaned parse failed; falling back to stdlib for diagnostics: %s", exc
70
+ )
71
+
72
+ # Step 3: stdlib json for diagnostics
73
+ try:
74
+ return SafeJSONResult(data=json.loads(cleaned, parse_constant=lambda _: None))
75
+ except json.JSONDecodeError as e:
76
+ preview = self._format_error_line(cleaned, e.lineno, e.colno)
77
+ return SafeJSONResult(
78
+ error="invalid_syntax",
79
+ message=e.msg,
80
+ line=e.lineno,
81
+ col=e.colno,
82
+ preview=preview,
83
+ )
84
+ except Exception:
85
+ return SafeJSONResult(error="unrecoverable", message="Unrecoverable JSON")
86
+
87
+ def loads_from_text(self, text: str) -> SafeJSONResult:
88
+ candidate = self._extract_json(text)
89
+ if not candidate:
90
+ return SafeJSONResult(error="no_json_found", message="No JSON found in text")
91
+ return self.loads(candidate)
92
+
93
+ # ------------------------------------------------------------------
94
+ # Helpers
95
+ # ------------------------------------------------------------------
96
+
97
+ @staticmethod
98
+ def _cleanup(text: str) -> str:
99
+ text = re.sub(r"(?m)\s*(//|#).*?$", "", text)
100
+ text = re.sub(r",\s*([}\]])", r"\1", text)
101
+ return re.sub(r"(?<!\\)'([^']*?)'(?!\\)", r'"\1"', text)
102
+
103
+ @staticmethod
104
+ def _extract_json(text: str) -> str | None:
105
+ if not text:
106
+ return None
107
+ match = re.search(r"```json\s*([\s\S]*?)```", text, re.IGNORECASE)
108
+ if match:
109
+ return match.group(1).strip()
110
+ match = re.search(r"```[\w-]*\s*([\s\S]*?)```", text)
111
+ if match and match.group(1).lstrip().startswith(("{", "[")):
112
+ return match.group(1).strip()
113
+ match = re.search(r"[\[{][\s\S]*[\]}]", text)
114
+ return match.group(0).strip() if match else None
115
+
116
+ @staticmethod
117
+ def _estimate_depth(text: str) -> int:
118
+ depth = 0
119
+ max_depth = 0
120
+ in_str: str | None = None
121
+ esc = False
122
+ for ch in text:
123
+ if in_str:
124
+ if esc:
125
+ esc = False
126
+ elif ch == "\\":
127
+ esc = True
128
+ elif ch == in_str:
129
+ in_str = None
130
+ continue
131
+ if ch in ('"', "'"):
132
+ in_str = ch
133
+ elif ch in "{[":
134
+ depth += 1
135
+ max_depth = max(max_depth, depth)
136
+ elif ch in "]}":
137
+ depth = max(depth - 1, 0)
138
+ return max_depth
139
+
140
+ @staticmethod
141
+ def _format_error_line(text: str, line_no: int, col_no: int, context: int = 1) -> str | None:
142
+ """Return a snippet of the line with a caret pointing at the error col."""
143
+ lines = text.splitlines()
144
+ if 1 <= line_no <= len(lines):
145
+ line = lines[line_no - 1]
146
+ caret_line = " " * (col_no - 1) + "^"
147
+ return f"{line}\n{caret_line}"
148
+ return None