hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
hexdag/core/logging.py ADDED
@@ -0,0 +1,449 @@
1
+ """Centralized logging configuration for hexDAG using Loguru.
2
+
3
+ Provides consistent logging across the framework with support for:
4
+ - Multiple output formats (console, JSON, structured)
5
+ - Environment-based configuration (dev/prod)
6
+ - Performance monitoring
7
+ - Structured context logging
8
+ - Integration with observability systems
9
+ - Idempotent configuration
10
+
11
+ Examples
12
+ --------
13
+ Basic usage:
14
+
15
+ >>> from hexdag.core.logging import get_logger
16
+ >>> logger = get_logger(__name__)
17
+ >>> logger.info("Pipeline started", pipeline_id="123")
18
+
19
+ Configure logging globally::
20
+
21
+ from hexdag.core.logging import configure_logging
22
+ configure_logging(level="DEBUG", format="json")
23
+
24
+ Custom configuration:
25
+
26
+ >>> from hexdag.core.logging import configure_logging
27
+ >>> configure_logging(
28
+ ... level="INFO",
29
+ ... format="structured",
30
+ ... output_file="hexdag.log",
31
+ ... )
32
+ """
33
+
34
+ import contextvars
35
+ import logging
36
+ import os
37
+ import sys
38
+ from contextlib import suppress
39
+ from functools import lru_cache
40
+ from pathlib import Path
41
+ from typing import TYPE_CHECKING, Literal
42
+
43
+ if TYPE_CHECKING:
44
+ import types
45
+
46
+ from hexdag.core.types import Logger
47
+
48
+ from loguru import logger
49
+ from rich.logging import RichHandler
50
+
51
+ LogLevel = Literal["TRACE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
52
+ LogFormat = Literal["console", "json", "structured", "rich", "dual"]
53
+
54
+ _CURRENT_CONFIG: dict | None = None
55
+ _HANDLER_IDS: list[int] = []
56
+
57
+ # Correlation ID context variable for request tracing
58
+ correlation_id: contextvars.ContextVar[str] = contextvars.ContextVar("correlation_id", default="-")
59
+
60
+
61
+ def configure_logging(
62
+ level: LogLevel = "INFO",
63
+ format: LogFormat = "structured",
64
+ output_file: str | Path | None = None,
65
+ use_color: bool = True,
66
+ include_timestamp: bool = True,
67
+ force_reconfigure: bool = False,
68
+ use_rich: bool = False,
69
+ dual_sink: bool = False,
70
+ enable_stdlib_bridge: bool = False,
71
+ backtrace: bool = True,
72
+ diagnose: bool = True,
73
+ ) -> None:
74
+ """Configure global logging for hexDAG framework.
75
+
76
+ This function is idempotent - calling it multiple times with the same
77
+ configuration will not duplicate handlers or change settings.
78
+
79
+ Parameters
80
+ ----------
81
+ level : LogLevel, default="INFO"
82
+ Minimum log level to output (TRACE, DEBUG, INFO, WARNING, ERROR, CRITICAL)
83
+ format : LogFormat, default="structured"
84
+ Output format:
85
+ - "console": Simple console output (no colors, basic format)
86
+ - "json": JSON format for log aggregation (uses orjson for performance)
87
+ - "structured": Enhanced structured format with colors (Loguru native)
88
+ - "rich": Rich console handler with beautiful formatting
89
+ - "dual": Dual-sink mode (Rich to stderr + JSON to stdout)
90
+ output_file : str | Path | None, default=None
91
+ Optional file path to write logs to (in addition to console)
92
+ use_color : bool, default=True
93
+ Use ANSI color codes in structured format (auto-disabled for non-TTY)
94
+ include_timestamp : bool, default=True
95
+ Include timestamp in log output
96
+ force_reconfigure : bool, default=False
97
+ Force reconfiguration even if already configured with same settings
98
+ use_rich : bool, default=False
99
+ Use Rich library for enhanced console output (overrides format if True)
100
+ dual_sink : bool, default=False
101
+ Enable dual-sink: Rich console (stderr) + JSON (stdout) simultaneously
102
+ enable_stdlib_bridge : bool, default=False
103
+ Enable interception of stdlib logging for third-party libraries
104
+ backtrace : bool, default=True
105
+ Enable backtrace for debugging (disable in production for security)
106
+ diagnose : bool, default=True
107
+ Enable diagnose mode with variable values (disable in production for security)
108
+
109
+ Examples
110
+ --------
111
+ Development setup with Rich::
112
+
113
+ configure_logging(level="DEBUG", format="rich", use_rich=True)
114
+
115
+ Dual-sink setup (Rich console + JSON for aggregation)::
116
+
117
+ configure_logging(level="INFO", dual_sink=True, use_rich=True)
118
+
119
+ Production setup::
120
+
121
+ configure_logging(
122
+ level="INFO",
123
+ format="json",
124
+ output_file="/var/log/hexdag/app.log",
125
+ backtrace=True,
126
+ diagnose=False, # Disable for security
127
+ )
128
+
129
+ Testing setup::
130
+
131
+ configure_logging(level="WARNING", format="console")
132
+ """
133
+ global _CURRENT_CONFIG, _HANDLER_IDS
134
+
135
+ current_config = {
136
+ "level": level,
137
+ "format": format,
138
+ "output_file": str(output_file) if output_file else None,
139
+ "use_color": use_color,
140
+ "include_timestamp": include_timestamp,
141
+ "use_rich": use_rich,
142
+ "dual_sink": dual_sink,
143
+ "enable_stdlib_bridge": enable_stdlib_bridge,
144
+ "backtrace": backtrace,
145
+ "diagnose": diagnose,
146
+ }
147
+
148
+ if not force_reconfigure and current_config == _CURRENT_CONFIG:
149
+ return
150
+
151
+ # Remove only our previously added handlers (not external ones)
152
+ # This ensures we don't interfere with pytest or other framework handlers
153
+ for handler_id in _HANDLER_IDS:
154
+ with suppress(ValueError):
155
+ logger.remove(handler_id)
156
+ _HANDLER_IDS.clear()
157
+
158
+ # Prepare format strings and track handler IDs
159
+ if dual_sink or format == "dual":
160
+ # Dual-sink mode: Rich console (stderr) + JSON (stdout)
161
+ # 1. Rich handler for human-readable output to stderr
162
+ rich_handler = RichHandler(
163
+ rich_tracebacks=True,
164
+ markup=True,
165
+ show_time=include_timestamp,
166
+ show_level=True,
167
+ show_path=True,
168
+ )
169
+ handler_id = logger.add(
170
+ sink=rich_handler,
171
+ level=level,
172
+ format="{message}",
173
+ backtrace=backtrace,
174
+ diagnose=diagnose,
175
+ )
176
+ _HANDLER_IDS.append(handler_id)
177
+
178
+ # 2. JSON handler for machine-readable output to stdout
179
+ handler_id = logger.add(
180
+ sink=sys.stdout,
181
+ level=level,
182
+ serialize=True, # JSON output
183
+ backtrace=backtrace,
184
+ diagnose=diagnose,
185
+ )
186
+ _HANDLER_IDS.append(handler_id)
187
+
188
+ elif use_rich or format == "rich":
189
+ # Rich-only mode
190
+ rich_handler = RichHandler(
191
+ rich_tracebacks=True,
192
+ markup=True,
193
+ show_time=include_timestamp,
194
+ show_level=True,
195
+ show_path=True,
196
+ )
197
+ handler_id = logger.add(
198
+ sink=rich_handler,
199
+ level=level,
200
+ format="{message}",
201
+ backtrace=backtrace,
202
+ diagnose=diagnose,
203
+ )
204
+ _HANDLER_IDS.append(handler_id)
205
+
206
+ elif format == "json":
207
+ # JSON format with orjson serialization
208
+ handler_id = logger.add(
209
+ sink=sys.stderr,
210
+ level=level,
211
+ serialize=True, # JSON output
212
+ backtrace=backtrace,
213
+ diagnose=diagnose,
214
+ )
215
+ _HANDLER_IDS.append(handler_id)
216
+
217
+ elif format == "structured":
218
+ # Structured format with optional colors
219
+ timestamp_fmt = "<green>{time:YYYY-MM-DD HH:mm:ss}</green> " if include_timestamp else ""
220
+ color_level = (
221
+ "<level>{level: <8}</level>" if use_color and sys.stderr.isatty() else "{level: <8}"
222
+ )
223
+ # Note: cid (correlation ID) is injected via .bind() in get_logger()
224
+ structured_format = (
225
+ f"{timestamp_fmt}[{color_level}]"
226
+ "<cyan>{name}:{function}:{line}</cyan> | <level>{message}</level>"
227
+ )
228
+
229
+ handler_id = logger.add(
230
+ sink=sys.stderr,
231
+ level=level,
232
+ format=structured_format,
233
+ colorize=use_color and sys.stderr.isatty(),
234
+ backtrace=backtrace,
235
+ diagnose=diagnose,
236
+ )
237
+ _HANDLER_IDS.append(handler_id)
238
+
239
+ else: # console
240
+ # Simple console format
241
+ timestamp_fmt = "{time:YYYY-MM-DD HH:mm:ss} " if include_timestamp else ""
242
+ console_format = f"{timestamp_fmt}{{level: <8}} | {{name}} | {{message}}"
243
+
244
+ handler_id = logger.add(
245
+ sink=sys.stderr,
246
+ level=level,
247
+ format=console_format,
248
+ colorize=False,
249
+ backtrace=backtrace,
250
+ diagnose=diagnose,
251
+ )
252
+ _HANDLER_IDS.append(handler_id)
253
+
254
+ if output_file:
255
+ output_path = Path(output_file)
256
+ output_path.parent.mkdir(parents=True, exist_ok=True)
257
+
258
+ # File output always uses JSON for easier parsing
259
+ handler_id = logger.add(
260
+ sink=output_path,
261
+ level=level,
262
+ serialize=True, # JSON format for files
263
+ rotation="10 MB", # Rotate when file reaches 10MB
264
+ retention="1 week", # Keep logs for 1 week
265
+ compression="zip", # Compress rotated logs
266
+ backtrace=backtrace,
267
+ diagnose=diagnose,
268
+ )
269
+ _HANDLER_IDS.append(handler_id)
270
+
271
+ # Enable stdlib logging bridge if requested
272
+ if enable_stdlib_bridge:
273
+ enable_stdlib_logging_bridge()
274
+
275
+ _CURRENT_CONFIG = current_config
276
+
277
+
278
+ @lru_cache(maxsize=256)
279
+ def get_logger(name: str) -> "Logger":
280
+ """Get a logger instance with the given name (cached for performance).
281
+
282
+ This is the recommended way to get loggers in hexDAG. Logger instances
283
+ are bound with the module name for better tracking.
284
+
285
+ Correlation IDs are dynamically included via ContextVar when logging.
286
+
287
+ Parameters
288
+ ----------
289
+ name : str
290
+ Logger name, typically __name__ from the calling module
291
+
292
+ Returns
293
+ -------
294
+ loguru.Logger
295
+ Configured logger instance bound with the module name
296
+
297
+ Examples
298
+ --------
299
+ >>> from hexdag.core.logging import get_logger, set_correlation_id
300
+ >>> set_correlation_id("req-123")
301
+ >>> logger = get_logger(__name__)
302
+ >>> logger.info("Starting orchestrator") # Includes cid=req-123 in context
303
+ >>> logger.debug("Processing node", node_id="abc123")
304
+
305
+ Notes
306
+ -----
307
+ - If configure_logging() hasn't been called, initializes with sensible defaults
308
+ - Logger names follow the module hierarchy (e.g., "hexdag.core.orchestrator")
309
+ - Correlation ID is automatically included from ContextVar when set
310
+ - Extra context can be passed as keyword arguments
311
+ - Logger instances are cached for performance
312
+ """
313
+ # Lazy initialization - only configure if not already done
314
+ _ensure_configured()
315
+ return logger.bind(module=name)
316
+
317
+
318
+ @lru_cache(maxsize=128)
319
+ def get_logger_for_component(component_type: str, component_name: str) -> "Logger":
320
+ """Get a logger for a specific component instance (cached for performance).
321
+
322
+ Useful for adapters, nodes, and other components that need
323
+ instance-specific logging.
324
+
325
+ Parameters
326
+ ----------
327
+ component_type : str
328
+ Type of component (e.g., "adapter", "node", "observer")
329
+ component_name : str
330
+ Specific component name (e.g., "openai_llm", "postgres_db")
331
+
332
+ Returns
333
+ -------
334
+ loguru.Logger
335
+ Logger bound with hierarchical name like "hexdag.adapter.openai_llm"
336
+
337
+ Examples
338
+ --------
339
+ >>> logger = get_logger_for_component("adapter", "openai_llm")
340
+ >>> logger.info("LLM adapter initialized")
341
+ """
342
+ # Lazy initialization - only configure if not already done
343
+ _ensure_configured()
344
+ logger_name = f"hexdag.{component_type}.{component_name}"
345
+ return logger.bind(
346
+ module=logger_name, component_type=component_type, component_name=component_name
347
+ )
348
+
349
+
350
+ def enable_stdlib_logging_bridge() -> None:
351
+ """Enable interception of stdlib logging for third-party libraries.
352
+
353
+ This redirects all stdlib logging.Logger calls to Loguru, ensuring
354
+ consistent formatting across all logs including from dependencies.
355
+
356
+ Examples
357
+ --------
358
+ >>> from hexdag.core.logging import configure_logging, enable_stdlib_logging_bridge
359
+ >>> configure_logging(level="INFO", format="structured")
360
+ >>> enable_stdlib_logging_bridge() # Now all stdlib logging goes through loguru
361
+ """
362
+
363
+ class InterceptHandler(logging.Handler):
364
+ def emit(self, record: logging.LogRecord) -> None:
365
+ level: str | int
366
+ try:
367
+ level = logger.level(record.levelname).name
368
+ except ValueError:
369
+ level = record.levelno
370
+
371
+ # Find caller from where originated the logged message
372
+ frame: types.FrameType | None = sys._getframe(6)
373
+ depth = 6
374
+ while frame and frame.f_code.co_filename == logging.__file__:
375
+ frame = frame.f_back
376
+ depth += 1
377
+
378
+ logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
379
+
380
+ # Intercept all stdlib logging
381
+ logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True)
382
+
383
+
384
+ def set_correlation_id(cid: str) -> None:
385
+ """Set correlation ID for the current context.
386
+
387
+ This ID will be automatically included in all log records emitted
388
+ within the current async context or thread.
389
+
390
+ Parameters
391
+ ----------
392
+ cid : str
393
+ Correlation ID (e.g., request ID, trace ID, session ID)
394
+
395
+ Examples
396
+ --------
397
+ >>> from hexdag.core.logging import set_correlation_id, get_logger
398
+ >>> set_correlation_id("req-abc-123")
399
+ >>> logger = get_logger(__name__)
400
+ >>> logger.info("Processing request") # Logs will include cid=req-abc-123
401
+ """
402
+ correlation_id.set(cid)
403
+
404
+
405
+ def get_correlation_id() -> str:
406
+ """Get the current correlation ID.
407
+
408
+ Returns
409
+ -------
410
+ str
411
+ Current correlation ID, or "-" if not set
412
+
413
+ Examples
414
+ --------
415
+ >>> from hexdag.core.logging import get_correlation_id, set_correlation_id
416
+ >>> get_correlation_id()
417
+ '-'
418
+ >>> set_correlation_id('req-abc-123')
419
+ >>> get_correlation_id()
420
+ 'req-abc-123'
421
+ """
422
+ return correlation_id.get()
423
+
424
+
425
+ def clear_correlation_id() -> None:
426
+ """Clear the correlation ID for the current context.
427
+
428
+ Examples
429
+ --------
430
+ >>> from hexdag.core.logging import clear_correlation_id
431
+ >>> clear_correlation_id()
432
+ """
433
+ correlation_id.set("-")
434
+
435
+
436
+ def _ensure_configured() -> None:
437
+ """Ensure logging has at least basic configuration (lazy initialization).
438
+
439
+ This is called automatically by get_logger() if no configuration exists.
440
+ Users can call configure_logging() explicitly for custom settings.
441
+ """
442
+
443
+ global _CURRENT_CONFIG
444
+
445
+ if _CURRENT_CONFIG is None:
446
+ # Default configuration - check environment variables first
447
+ level = os.getenv("HEXDAG_LOG_LEVEL", "INFO").upper()
448
+ format_type = os.getenv("HEXDAG_LOG_FORMAT", "structured").lower()
449
+ configure_logging(level=level, format=format_type) # type: ignore
@@ -0,0 +1,17 @@
1
+ """Standard Pydantic models for HexDAG.
2
+
3
+ This module provides common Pydantic models for use in DAG nodes.
4
+ These models serve as data contracts between nodes and enable
5
+ type safety, validation, and serialization.
6
+ """
7
+
8
+ from .base import DataReference, FileData, InMemoryData, JsonData, ListData, StringData
9
+
10
+ __all__ = [
11
+ "DataReference",
12
+ "FileData",
13
+ "InMemoryData",
14
+ "StringData",
15
+ "JsonData",
16
+ "ListData",
17
+ ]
@@ -0,0 +1,138 @@
1
+ """Base Pydantic models for HexDAG nodes."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any, TypeVar
5
+
6
+ from pydantic import BaseModel, ConfigDict, Field
7
+
8
+ T = TypeVar("T")
9
+
10
+
11
+ class DataReference(BaseModel):
12
+ """Base class for references to offloaded data.
13
+
14
+ This pattern works for Spark (Delta/Parquet), cloud storage (S3/GCS),
15
+ or any system where data is stored externally instead of in memory.
16
+
17
+ Attributes
18
+ ----------
19
+ location : str
20
+ URI of the data (e.g., "s3://bucket/path", "delta://table", "file:///path")
21
+ format : str
22
+ Data format (e.g., "parquet", "delta", "csv", "json")
23
+ schema_info : dict[str, str] | None
24
+ Column names and types
25
+ size_bytes : int | None
26
+ Size of the data in bytes
27
+ row_count : int | None
28
+ Number of rows (for tabular data)
29
+ created_at : str | None
30
+ ISO timestamp of when data was created
31
+ """
32
+
33
+ location: str
34
+ format: str
35
+ schema_info: dict[str, str] | None = None # Renamed to avoid conflict with BaseModel.schema
36
+ size_bytes: int | None = None
37
+ row_count: int | None = None
38
+ created_at: str | None = None
39
+
40
+ model_config = ConfigDict(frozen=True) # Immutable references
41
+
42
+
43
+ class FileData(BaseModel):
44
+ """Reference to file-based data.
45
+
46
+ Attributes
47
+ ----------
48
+ path : Path
49
+ Path to the file
50
+ encoding : str
51
+ File encoding (default: utf-8)
52
+ mime_type : str | None
53
+ MIME type of the file
54
+ size_bytes : int | None
55
+ File size in bytes
56
+ """
57
+
58
+ path: Path
59
+ encoding: str = "utf-8"
60
+ mime_type: str | None = None
61
+ size_bytes: int | None = None
62
+
63
+ model_config = ConfigDict(frozen=True)
64
+
65
+
66
+ class InMemoryData[T](BaseModel):
67
+ """Container for small data that stays in memory.
68
+
69
+ Use this for configuration, metadata, or small datasets
70
+ that don't need to be offloaded.
71
+
72
+ Attributes
73
+ ----------
74
+ data : T
75
+ The actual data (any type)
76
+ metadata : dict[str, Any]
77
+ Optional metadata about the data
78
+ """
79
+
80
+ data: T
81
+ metadata: dict[str, Any] = Field(default_factory=dict)
82
+
83
+ model_config = ConfigDict(arbitrary_types_allowed=True)
84
+
85
+
86
+ class StringData(BaseModel):
87
+ """Simple string data container.
88
+
89
+ Attributes
90
+ ----------
91
+ value : str
92
+ The string value
93
+ encoding : str
94
+ String encoding (default: utf-8)
95
+ """
96
+
97
+ value: str
98
+ encoding: str = "utf-8"
99
+
100
+ model_config = ConfigDict(frozen=True)
101
+
102
+
103
+ class JsonData(BaseModel):
104
+ """JSON data container.
105
+
106
+ Attributes
107
+ ----------
108
+ data : dict[str, Any]
109
+ The JSON data as a dictionary
110
+ schema_version : str | None
111
+ Optional schema version identifier
112
+ """
113
+
114
+ data: dict[str, Any]
115
+ schema_version: str | None = None
116
+
117
+ model_config = ConfigDict(frozen=True)
118
+
119
+
120
+ class ListData[T](BaseModel):
121
+ """Container for list data.
122
+
123
+ Attributes
124
+ ----------
125
+ items : list[T]
126
+ The list of items
127
+ count : int | None
128
+ Optional cached count (useful for large lists)
129
+ """
130
+
131
+ items: list[T]
132
+
133
+ @property
134
+ def count(self) -> int:
135
+ """Get the count of items."""
136
+ return len(self.items)
137
+
138
+ model_config = ConfigDict(frozen=True)
@@ -0,0 +1,46 @@
1
+ """Orchestration layer for DAG execution.
2
+
3
+ This package contains all orchestrator-related components including the main
4
+ Orchestrator class, configuration, builders, and execution components.
5
+
6
+ The orchestration layer is responsible for:
7
+ - Executing DirectedGraphs in topological order
8
+ - Managing concurrent execution with wave-based parallelism
9
+ - Providing checkpoint/resume capabilities
10
+ - Supporting per-node port configuration
11
+ - Coordinating policies and events
12
+
13
+ Main exports
14
+ ------------
15
+ Orchestrator : The main DAG execution engine
16
+ OrchestratorConfig : Configuration dataclass
17
+ OrchestratorBuilder : Fluent builder for creating orchestrators
18
+ ExecutionState : Model for checkpoint state
19
+ ExecutionContext : Context for node execution metadata
20
+ PendingApproval : Model for HITL approval requests
21
+
22
+ Examples
23
+ --------
24
+ Example usage::
25
+
26
+ from hexdag.core.orchestration import Orchestrator, OrchestratorConfig
27
+ config = OrchestratorConfig(max_concurrent_nodes=5)
28
+ orchestrator = Orchestrator(config=config)
29
+ """
30
+
31
+ from hexdag.core.orchestration.body_executor import BodyExecutor, BodyExecutorError
32
+ from hexdag.core.orchestration.components import CheckpointManager
33
+ from hexdag.core.orchestration.models import (
34
+ CheckpointState,
35
+ NodeExecutionContext,
36
+ OrchestratorConfig,
37
+ )
38
+
39
+ __all__ = [
40
+ "BodyExecutor",
41
+ "BodyExecutorError",
42
+ "CheckpointManager",
43
+ "CheckpointState",
44
+ "NodeExecutionContext",
45
+ "OrchestratorConfig",
46
+ ]