hexdag 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. hexdag/__init__.py +116 -0
  2. hexdag/__main__.py +30 -0
  3. hexdag/adapters/executors/__init__.py +5 -0
  4. hexdag/adapters/executors/local_executor.py +316 -0
  5. hexdag/builtin/__init__.py +6 -0
  6. hexdag/builtin/adapters/__init__.py +51 -0
  7. hexdag/builtin/adapters/anthropic/__init__.py +5 -0
  8. hexdag/builtin/adapters/anthropic/anthropic_adapter.py +151 -0
  9. hexdag/builtin/adapters/database/__init__.py +6 -0
  10. hexdag/builtin/adapters/database/csv/csv_adapter.py +249 -0
  11. hexdag/builtin/adapters/database/pgvector/__init__.py +5 -0
  12. hexdag/builtin/adapters/database/pgvector/pgvector_adapter.py +478 -0
  13. hexdag/builtin/adapters/database/sqlalchemy/sqlalchemy_adapter.py +252 -0
  14. hexdag/builtin/adapters/database/sqlite/__init__.py +5 -0
  15. hexdag/builtin/adapters/database/sqlite/sqlite_adapter.py +410 -0
  16. hexdag/builtin/adapters/local/README.md +59 -0
  17. hexdag/builtin/adapters/local/__init__.py +7 -0
  18. hexdag/builtin/adapters/local/local_observer_manager.py +696 -0
  19. hexdag/builtin/adapters/memory/__init__.py +47 -0
  20. hexdag/builtin/adapters/memory/file_memory_adapter.py +297 -0
  21. hexdag/builtin/adapters/memory/in_memory_memory.py +216 -0
  22. hexdag/builtin/adapters/memory/schemas.py +57 -0
  23. hexdag/builtin/adapters/memory/session_memory.py +178 -0
  24. hexdag/builtin/adapters/memory/sqlite_memory_adapter.py +215 -0
  25. hexdag/builtin/adapters/memory/state_memory.py +280 -0
  26. hexdag/builtin/adapters/mock/README.md +89 -0
  27. hexdag/builtin/adapters/mock/__init__.py +15 -0
  28. hexdag/builtin/adapters/mock/hexdag.toml +50 -0
  29. hexdag/builtin/adapters/mock/mock_database.py +225 -0
  30. hexdag/builtin/adapters/mock/mock_embedding.py +223 -0
  31. hexdag/builtin/adapters/mock/mock_llm.py +177 -0
  32. hexdag/builtin/adapters/mock/mock_tool_adapter.py +192 -0
  33. hexdag/builtin/adapters/mock/mock_tool_router.py +232 -0
  34. hexdag/builtin/adapters/openai/__init__.py +5 -0
  35. hexdag/builtin/adapters/openai/openai_adapter.py +634 -0
  36. hexdag/builtin/adapters/secret/__init__.py +7 -0
  37. hexdag/builtin/adapters/secret/local_secret_adapter.py +248 -0
  38. hexdag/builtin/adapters/unified_tool_router.py +280 -0
  39. hexdag/builtin/macros/__init__.py +17 -0
  40. hexdag/builtin/macros/conversation_agent.py +390 -0
  41. hexdag/builtin/macros/llm_macro.py +151 -0
  42. hexdag/builtin/macros/reasoning_agent.py +423 -0
  43. hexdag/builtin/macros/tool_macro.py +380 -0
  44. hexdag/builtin/nodes/__init__.py +38 -0
  45. hexdag/builtin/nodes/_discovery.py +123 -0
  46. hexdag/builtin/nodes/agent_node.py +696 -0
  47. hexdag/builtin/nodes/base_node_factory.py +242 -0
  48. hexdag/builtin/nodes/composite_node.py +926 -0
  49. hexdag/builtin/nodes/data_node.py +201 -0
  50. hexdag/builtin/nodes/expression_node.py +487 -0
  51. hexdag/builtin/nodes/function_node.py +454 -0
  52. hexdag/builtin/nodes/llm_node.py +491 -0
  53. hexdag/builtin/nodes/loop_node.py +920 -0
  54. hexdag/builtin/nodes/mapped_input.py +518 -0
  55. hexdag/builtin/nodes/port_call_node.py +269 -0
  56. hexdag/builtin/nodes/tool_call_node.py +195 -0
  57. hexdag/builtin/nodes/tool_utils.py +390 -0
  58. hexdag/builtin/prompts/__init__.py +68 -0
  59. hexdag/builtin/prompts/base.py +422 -0
  60. hexdag/builtin/prompts/chat_prompts.py +303 -0
  61. hexdag/builtin/prompts/error_correction_prompts.py +320 -0
  62. hexdag/builtin/prompts/tool_prompts.py +160 -0
  63. hexdag/builtin/tools/builtin_tools.py +84 -0
  64. hexdag/builtin/tools/database_tools.py +164 -0
  65. hexdag/cli/__init__.py +17 -0
  66. hexdag/cli/__main__.py +7 -0
  67. hexdag/cli/commands/__init__.py +27 -0
  68. hexdag/cli/commands/build_cmd.py +812 -0
  69. hexdag/cli/commands/create_cmd.py +208 -0
  70. hexdag/cli/commands/docs_cmd.py +293 -0
  71. hexdag/cli/commands/generate_types_cmd.py +252 -0
  72. hexdag/cli/commands/init_cmd.py +188 -0
  73. hexdag/cli/commands/pipeline_cmd.py +494 -0
  74. hexdag/cli/commands/plugin_dev_cmd.py +529 -0
  75. hexdag/cli/commands/plugins_cmd.py +441 -0
  76. hexdag/cli/commands/studio_cmd.py +101 -0
  77. hexdag/cli/commands/validate_cmd.py +221 -0
  78. hexdag/cli/main.py +84 -0
  79. hexdag/core/__init__.py +83 -0
  80. hexdag/core/config/__init__.py +20 -0
  81. hexdag/core/config/loader.py +479 -0
  82. hexdag/core/config/models.py +150 -0
  83. hexdag/core/configurable.py +294 -0
  84. hexdag/core/context/__init__.py +37 -0
  85. hexdag/core/context/execution_context.py +378 -0
  86. hexdag/core/docs/__init__.py +26 -0
  87. hexdag/core/docs/extractors.py +678 -0
  88. hexdag/core/docs/generators.py +890 -0
  89. hexdag/core/docs/models.py +120 -0
  90. hexdag/core/domain/__init__.py +10 -0
  91. hexdag/core/domain/dag.py +1225 -0
  92. hexdag/core/exceptions.py +234 -0
  93. hexdag/core/expression_parser.py +569 -0
  94. hexdag/core/logging.py +449 -0
  95. hexdag/core/models/__init__.py +17 -0
  96. hexdag/core/models/base.py +138 -0
  97. hexdag/core/orchestration/__init__.py +46 -0
  98. hexdag/core/orchestration/body_executor.py +481 -0
  99. hexdag/core/orchestration/components/__init__.py +97 -0
  100. hexdag/core/orchestration/components/adapter_lifecycle_manager.py +113 -0
  101. hexdag/core/orchestration/components/checkpoint_manager.py +134 -0
  102. hexdag/core/orchestration/components/execution_coordinator.py +360 -0
  103. hexdag/core/orchestration/components/health_check_manager.py +176 -0
  104. hexdag/core/orchestration/components/input_mapper.py +143 -0
  105. hexdag/core/orchestration/components/lifecycle_manager.py +583 -0
  106. hexdag/core/orchestration/components/node_executor.py +377 -0
  107. hexdag/core/orchestration/components/secret_manager.py +202 -0
  108. hexdag/core/orchestration/components/wave_executor.py +158 -0
  109. hexdag/core/orchestration/constants.py +17 -0
  110. hexdag/core/orchestration/events/README.md +312 -0
  111. hexdag/core/orchestration/events/__init__.py +104 -0
  112. hexdag/core/orchestration/events/batching.py +330 -0
  113. hexdag/core/orchestration/events/decorators.py +139 -0
  114. hexdag/core/orchestration/events/events.py +573 -0
  115. hexdag/core/orchestration/events/observers/__init__.py +30 -0
  116. hexdag/core/orchestration/events/observers/core_observers.py +690 -0
  117. hexdag/core/orchestration/events/observers/models.py +111 -0
  118. hexdag/core/orchestration/events/taxonomy.py +269 -0
  119. hexdag/core/orchestration/hook_context.py +237 -0
  120. hexdag/core/orchestration/hooks.py +437 -0
  121. hexdag/core/orchestration/models.py +418 -0
  122. hexdag/core/orchestration/orchestrator.py +910 -0
  123. hexdag/core/orchestration/orchestrator_factory.py +275 -0
  124. hexdag/core/orchestration/port_wrappers.py +327 -0
  125. hexdag/core/orchestration/prompt/__init__.py +32 -0
  126. hexdag/core/orchestration/prompt/template.py +332 -0
  127. hexdag/core/pipeline_builder/__init__.py +21 -0
  128. hexdag/core/pipeline_builder/component_instantiator.py +386 -0
  129. hexdag/core/pipeline_builder/include_tag.py +265 -0
  130. hexdag/core/pipeline_builder/pipeline_config.py +133 -0
  131. hexdag/core/pipeline_builder/py_tag.py +223 -0
  132. hexdag/core/pipeline_builder/tag_discovery.py +268 -0
  133. hexdag/core/pipeline_builder/yaml_builder.py +1196 -0
  134. hexdag/core/pipeline_builder/yaml_validator.py +569 -0
  135. hexdag/core/ports/__init__.py +65 -0
  136. hexdag/core/ports/api_call.py +133 -0
  137. hexdag/core/ports/database.py +489 -0
  138. hexdag/core/ports/embedding.py +215 -0
  139. hexdag/core/ports/executor.py +237 -0
  140. hexdag/core/ports/file_storage.py +117 -0
  141. hexdag/core/ports/healthcheck.py +87 -0
  142. hexdag/core/ports/llm.py +551 -0
  143. hexdag/core/ports/memory.py +70 -0
  144. hexdag/core/ports/observer_manager.py +130 -0
  145. hexdag/core/ports/secret.py +145 -0
  146. hexdag/core/ports/tool_router.py +94 -0
  147. hexdag/core/ports_builder.py +623 -0
  148. hexdag/core/protocols.py +273 -0
  149. hexdag/core/resolver.py +304 -0
  150. hexdag/core/schema/__init__.py +9 -0
  151. hexdag/core/schema/generator.py +742 -0
  152. hexdag/core/secrets.py +242 -0
  153. hexdag/core/types.py +413 -0
  154. hexdag/core/utils/async_warnings.py +206 -0
  155. hexdag/core/utils/schema_conversion.py +78 -0
  156. hexdag/core/utils/sql_validation.py +86 -0
  157. hexdag/core/validation/secure_json.py +148 -0
  158. hexdag/core/yaml_macro.py +517 -0
  159. hexdag/mcp_server.py +3120 -0
  160. hexdag/studio/__init__.py +10 -0
  161. hexdag/studio/build_ui.py +92 -0
  162. hexdag/studio/server/__init__.py +1 -0
  163. hexdag/studio/server/main.py +100 -0
  164. hexdag/studio/server/routes/__init__.py +9 -0
  165. hexdag/studio/server/routes/execute.py +208 -0
  166. hexdag/studio/server/routes/export.py +558 -0
  167. hexdag/studio/server/routes/files.py +207 -0
  168. hexdag/studio/server/routes/plugins.py +419 -0
  169. hexdag/studio/server/routes/validate.py +220 -0
  170. hexdag/studio/ui/index.html +13 -0
  171. hexdag/studio/ui/package-lock.json +2992 -0
  172. hexdag/studio/ui/package.json +31 -0
  173. hexdag/studio/ui/postcss.config.js +6 -0
  174. hexdag/studio/ui/public/hexdag.svg +5 -0
  175. hexdag/studio/ui/src/App.tsx +251 -0
  176. hexdag/studio/ui/src/components/Canvas.tsx +408 -0
  177. hexdag/studio/ui/src/components/ContextMenu.tsx +187 -0
  178. hexdag/studio/ui/src/components/FileBrowser.tsx +123 -0
  179. hexdag/studio/ui/src/components/Header.tsx +181 -0
  180. hexdag/studio/ui/src/components/HexdagNode.tsx +193 -0
  181. hexdag/studio/ui/src/components/NodeInspector.tsx +512 -0
  182. hexdag/studio/ui/src/components/NodePalette.tsx +262 -0
  183. hexdag/studio/ui/src/components/NodePortsSection.tsx +403 -0
  184. hexdag/studio/ui/src/components/PluginManager.tsx +347 -0
  185. hexdag/studio/ui/src/components/PortsEditor.tsx +481 -0
  186. hexdag/studio/ui/src/components/PythonEditor.tsx +195 -0
  187. hexdag/studio/ui/src/components/ValidationPanel.tsx +105 -0
  188. hexdag/studio/ui/src/components/YamlEditor.tsx +196 -0
  189. hexdag/studio/ui/src/components/index.ts +8 -0
  190. hexdag/studio/ui/src/index.css +92 -0
  191. hexdag/studio/ui/src/main.tsx +10 -0
  192. hexdag/studio/ui/src/types/index.ts +123 -0
  193. hexdag/studio/ui/src/vite-env.d.ts +1 -0
  194. hexdag/studio/ui/tailwind.config.js +29 -0
  195. hexdag/studio/ui/tsconfig.json +37 -0
  196. hexdag/studio/ui/tsconfig.node.json +13 -0
  197. hexdag/studio/ui/vite.config.ts +35 -0
  198. hexdag/visualization/__init__.py +69 -0
  199. hexdag/visualization/dag_visualizer.py +1020 -0
  200. hexdag-0.5.0.dev1.dist-info/METADATA +369 -0
  201. hexdag-0.5.0.dev1.dist-info/RECORD +261 -0
  202. hexdag-0.5.0.dev1.dist-info/WHEEL +4 -0
  203. hexdag-0.5.0.dev1.dist-info/entry_points.txt +4 -0
  204. hexdag-0.5.0.dev1.dist-info/licenses/LICENSE +190 -0
  205. hexdag_plugins/.gitignore +43 -0
  206. hexdag_plugins/README.md +73 -0
  207. hexdag_plugins/__init__.py +1 -0
  208. hexdag_plugins/azure/LICENSE +21 -0
  209. hexdag_plugins/azure/README.md +414 -0
  210. hexdag_plugins/azure/__init__.py +21 -0
  211. hexdag_plugins/azure/azure_blob_adapter.py +450 -0
  212. hexdag_plugins/azure/azure_cosmos_adapter.py +383 -0
  213. hexdag_plugins/azure/azure_keyvault_adapter.py +314 -0
  214. hexdag_plugins/azure/azure_openai_adapter.py +415 -0
  215. hexdag_plugins/azure/pyproject.toml +107 -0
  216. hexdag_plugins/azure/tests/__init__.py +1 -0
  217. hexdag_plugins/azure/tests/test_azure_blob_adapter.py +350 -0
  218. hexdag_plugins/azure/tests/test_azure_cosmos_adapter.py +323 -0
  219. hexdag_plugins/azure/tests/test_azure_keyvault_adapter.py +330 -0
  220. hexdag_plugins/azure/tests/test_azure_openai_adapter.py +329 -0
  221. hexdag_plugins/hexdag_etl/README.md +168 -0
  222. hexdag_plugins/hexdag_etl/__init__.py +53 -0
  223. hexdag_plugins/hexdag_etl/examples/01_simple_pandas_transform.py +270 -0
  224. hexdag_plugins/hexdag_etl/examples/02_simple_pandas_only.py +149 -0
  225. hexdag_plugins/hexdag_etl/examples/03_file_io_pipeline.py +109 -0
  226. hexdag_plugins/hexdag_etl/examples/test_pandas_transform.py +84 -0
  227. hexdag_plugins/hexdag_etl/hexdag.toml +25 -0
  228. hexdag_plugins/hexdag_etl/hexdag_etl/__init__.py +48 -0
  229. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/__init__.py +13 -0
  230. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/api_extract.py +230 -0
  231. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/base_node_factory.py +181 -0
  232. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/file_io.py +415 -0
  233. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/outlook.py +492 -0
  234. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/pandas_transform.py +563 -0
  235. hexdag_plugins/hexdag_etl/hexdag_etl/nodes/sql_extract_load.py +112 -0
  236. hexdag_plugins/hexdag_etl/pyproject.toml +82 -0
  237. hexdag_plugins/hexdag_etl/test_transform.py +54 -0
  238. hexdag_plugins/hexdag_etl/tests/test_plugin_integration.py +62 -0
  239. hexdag_plugins/mysql_adapter/LICENSE +21 -0
  240. hexdag_plugins/mysql_adapter/README.md +224 -0
  241. hexdag_plugins/mysql_adapter/__init__.py +6 -0
  242. hexdag_plugins/mysql_adapter/mysql_adapter.py +408 -0
  243. hexdag_plugins/mysql_adapter/pyproject.toml +93 -0
  244. hexdag_plugins/mysql_adapter/tests/test_mysql_adapter.py +259 -0
  245. hexdag_plugins/storage/README.md +184 -0
  246. hexdag_plugins/storage/__init__.py +19 -0
  247. hexdag_plugins/storage/file/__init__.py +5 -0
  248. hexdag_plugins/storage/file/local.py +325 -0
  249. hexdag_plugins/storage/ports/__init__.py +5 -0
  250. hexdag_plugins/storage/ports/vector_store.py +236 -0
  251. hexdag_plugins/storage/sql/__init__.py +7 -0
  252. hexdag_plugins/storage/sql/base.py +187 -0
  253. hexdag_plugins/storage/sql/mysql.py +27 -0
  254. hexdag_plugins/storage/sql/postgresql.py +27 -0
  255. hexdag_plugins/storage/tests/__init__.py +1 -0
  256. hexdag_plugins/storage/tests/test_local_file_storage.py +161 -0
  257. hexdag_plugins/storage/tests/test_sql_adapters.py +212 -0
  258. hexdag_plugins/storage/vector/__init__.py +7 -0
  259. hexdag_plugins/storage/vector/chromadb.py +223 -0
  260. hexdag_plugins/storage/vector/in_memory.py +285 -0
  261. hexdag_plugins/storage/vector/pgvector.py +502 -0
@@ -0,0 +1,742 @@
1
+ """Schema generator - converts Python signatures to JSON Schema."""
2
+
3
+ import inspect
4
+ import json
5
+ import re
6
+ from collections.abc import Callable
7
+ from typing import Any, get_args, get_origin
8
+
9
+ import yaml
10
+
11
+ from hexdag.core.logging import get_logger
12
+ from hexdag.core.types import (
13
+ get_annotated_metadata,
14
+ is_annotated_type,
15
+ is_dict_type,
16
+ is_list_type,
17
+ is_literal_type,
18
+ is_union_type,
19
+ )
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class SchemaGenerator:
25
+ """Generate JSON Schema from Python callables.
26
+
27
+ This class introspects Python functions/methods to automatically generate
28
+ JSON Schema definitions. It supports:
29
+ - Basic types (str, int, float, bool)
30
+ - Literal types → enum
31
+ - Union types → anyOf
32
+ - List/Dict types → array/object
33
+ - Annotated types with Pydantic Field constraints
34
+ - Docstring extraction for descriptions
35
+
36
+ Examples
37
+ --------
38
+ >>> def my_func(name: str, count: int = 10):
39
+ ... '''Example function.'''
40
+ ... pass
41
+ >>> schema = SchemaGenerator.from_callable(my_func)
42
+ >>> schema['properties']['count']['default']
43
+ 10
44
+ """
45
+
46
+ # Basic type mapping from Python to JSON Schema
47
+ BASIC_TYPE_MAP = {
48
+ str: {"type": "string"},
49
+ int: {"type": "integer"},
50
+ float: {"type": "number"},
51
+ bool: {"type": "boolean"},
52
+ dict: {"type": "object"},
53
+ list: {"type": "array"},
54
+ None: {"type": "null"},
55
+ type(None): {"type": "null"},
56
+ }
57
+
58
+ @staticmethod
59
+ def from_callable(factory: Callable, format: str = "dict") -> dict | str:
60
+ """Generate schema from factory __call__ signature.
61
+
62
+ Resolution order:
63
+ 1. Check for explicit ``_yaml_schema`` class attribute
64
+ 2. Fall back to ``__call__`` signature introspection
65
+
66
+ This allows builder-pattern classes like ConditionalNode to define
67
+ explicit schemas for YAML/MCP usage.
68
+
69
+ Args
70
+ ----
71
+ factory: Callable (function, method, or callable class) to introspect
72
+ format: Output format - "dict", "yaml", or "json"
73
+
74
+ Returns
75
+ -------
76
+ dict | str: JSON Schema in requested format
77
+
78
+ Raises
79
+ ------
80
+ ValueError
81
+ If format is not one of: dict, yaml, json
82
+
83
+ Examples
84
+ --------
85
+ >>> def factory(name: str, count: int, enabled: bool = True):
86
+ ... pass
87
+ >>> schema = SchemaGenerator.from_callable(factory)
88
+ >>> schema['properties']['enabled']['default']
89
+ True
90
+ >>> schema['required']
91
+ ['count']
92
+
93
+ >>> # Classes can define explicit schemas
94
+ >>> class MyNode:
95
+ ... _yaml_schema = {"type": "object", "properties": {"foo": {"type": "string"}}}
96
+ >>> schema = SchemaGenerator.from_callable(MyNode)
97
+ >>> "foo" in schema.get("properties", {})
98
+ True
99
+ """
100
+ if format not in ("dict", "yaml", "json"):
101
+ raise ValueError(f"Invalid format: {format}. Must be one of: dict, yaml, json")
102
+
103
+ # Check for explicit _yaml_schema class attribute (for builder-pattern nodes)
104
+ yaml_schema = getattr(factory, "_yaml_schema", None)
105
+ if yaml_schema and isinstance(yaml_schema, dict):
106
+ logger.debug(f"Using explicit _yaml_schema for {factory}")
107
+ return SchemaGenerator._format_output(yaml_schema, format)
108
+
109
+ try:
110
+ sig = inspect.signature(factory)
111
+ except (ValueError, TypeError) as e:
112
+ logger.warning(f"Could not get signature for {factory}: {e}")
113
+ return SchemaGenerator._format_output({}, format)
114
+
115
+ properties = {}
116
+ required = []
117
+
118
+ # Extract param docs - try __call__ method first (for callable instances),
119
+ # then fall back to the factory itself
120
+ param_docs: dict[str, str] = {}
121
+ # We need the actual __call__ method to extract docstrings, not just callable check
122
+ call_method = getattr(factory, "__call__", None) # noqa: B004
123
+ if call_method is not None:
124
+ param_docs = SchemaGenerator._extract_param_docs(call_method)
125
+ if not param_docs:
126
+ param_docs = SchemaGenerator._extract_param_docs(factory)
127
+
128
+ param_list = list(sig.parameters.items())
129
+ first_non_self_param = None
130
+ for pname, _ in param_list:
131
+ if pname not in ("self", "cls"):
132
+ first_non_self_param = pname
133
+ break
134
+
135
+ for param_name, param in sig.parameters.items():
136
+ # Skip special parameters
137
+ if param_name in ("self", "cls", "args", "kwargs"):
138
+ continue
139
+
140
+ # Skip 'name' if it's the first parameter (node factory convention)
141
+ if param_name == "name" and param_name == first_non_self_param:
142
+ continue
143
+
144
+ # Skip *args and **kwargs
145
+ if param.kind in (
146
+ inspect.Parameter.VAR_POSITIONAL,
147
+ inspect.Parameter.VAR_KEYWORD,
148
+ ):
149
+ continue
150
+
151
+ param_type = param.annotation
152
+
153
+ # Skip if no type annotation
154
+ if param_type == inspect.Parameter.empty:
155
+ # Default to string type
156
+ param_type = str
157
+
158
+ # Generate property schema
159
+ prop_schema = SchemaGenerator._type_to_json_schema(param_type)
160
+
161
+ if param_name in param_docs:
162
+ doc_text = param_docs[param_name]
163
+ prop_schema["description"] = doc_text
164
+
165
+ # For list[dict] types, try to extract nested structure from docstring
166
+ if (
167
+ prop_schema.get("type") == "array"
168
+ and prop_schema.get("items", {}).get("type") == "object"
169
+ ):
170
+ nested_schema = SchemaGenerator._extract_nested_structure(doc_text)
171
+ if nested_schema and "properties" in nested_schema:
172
+ prop_schema["items"] = nested_schema
173
+
174
+ if param.default != inspect.Parameter.empty:
175
+ prop_schema["default"] = param.default
176
+ else:
177
+ # Required if no default
178
+ required.append(param_name)
179
+
180
+ properties[param_name] = prop_schema
181
+
182
+ schema = {
183
+ "type": "object",
184
+ "properties": properties,
185
+ "additionalProperties": False,
186
+ }
187
+
188
+ if required:
189
+ schema["required"] = required
190
+
191
+ return SchemaGenerator._format_output(schema, format)
192
+
193
+ @staticmethod
194
+ def _is_callable_type(type_hint: Any) -> bool:
195
+ """Check if type hint is a Callable type.
196
+
197
+ Examples
198
+ --------
199
+ >>> from collections.abc import Callable
200
+ >>> SchemaGenerator._is_callable_type(Callable)
201
+ True
202
+ >>> SchemaGenerator._is_callable_type(Callable[..., Any])
203
+ True
204
+ >>> SchemaGenerator._is_callable_type(str)
205
+ False
206
+ """
207
+ # Check for collections.abc.Callable
208
+ if type_hint is Callable:
209
+ return True
210
+
211
+ # Check for typing.Callable or parameterized Callable[..., ...]
212
+ origin = get_origin(type_hint)
213
+ if origin is Callable:
214
+ return True
215
+
216
+ # Check for callable origin from collections.abc
217
+ try:
218
+ from collections.abc import Callable as ABCCallable
219
+
220
+ if origin is ABCCallable:
221
+ return True
222
+ except ImportError:
223
+ pass
224
+
225
+ return False
226
+
227
+ @staticmethod
228
+ def _type_to_json_schema(type_hint: Any) -> dict:
229
+ """Convert Python type hint to JSON Schema type.
230
+
231
+ Handles:
232
+ - Basic types: str, int, float, bool
233
+ - Literal types: Literal["a", "b"] → enum
234
+ - Union types: str | int → anyOf
235
+ - List types: list[str] → array
236
+ - Dict types: dict[str, Any] → object
237
+ - Callable types: Callable[..., Any] → string (module path)
238
+ - Annotated types: Annotated[int, Field(ge=0)] → min/max
239
+ - Type aliases: Resolves type aliases to their base Literal/Union types
240
+
241
+ Args
242
+ ----
243
+ type_hint: Python type annotation
244
+
245
+ Returns
246
+ -------
247
+ dict: JSON Schema definition for the type
248
+
249
+ Examples
250
+ --------
251
+ >>> from typing import Literal
252
+ >>> SchemaGenerator._type_to_json_schema(Literal["a", "b"])
253
+ {'type': 'string', 'enum': ['a', 'b']}
254
+ """
255
+ # Handle Callable types first (before checking other patterns)
256
+ # Callables are represented as strings (module paths) in YAML
257
+ if SchemaGenerator._is_callable_type(type_hint):
258
+ return {
259
+ "type": "string",
260
+ "description": "Module path string (e.g., 'myapp.process') or !py inline function",
261
+ }
262
+
263
+ if is_annotated_type(type_hint):
264
+ base_type, metadata = get_annotated_metadata(type_hint)
265
+
266
+ # Recursively process base type
267
+ schema = SchemaGenerator._type_to_json_schema(base_type)
268
+
269
+ # In Pydantic v2, Field stores metadata as FieldInfo with .metadata attribute
270
+ for constraint in metadata:
271
+ if hasattr(constraint, "metadata") and constraint.metadata:
272
+ for meta_item in constraint.metadata:
273
+ # Ge, Le, Gt, Lt objects
274
+ if hasattr(meta_item, "ge"):
275
+ schema["minimum"] = meta_item.ge
276
+ if hasattr(meta_item, "le"):
277
+ schema["maximum"] = meta_item.le
278
+ if hasattr(meta_item, "gt"):
279
+ schema["exclusiveMinimum"] = meta_item.gt
280
+ if hasattr(meta_item, "lt"):
281
+ schema["exclusiveMaximum"] = meta_item.lt
282
+ # MinLen, MaxLen objects
283
+ if hasattr(meta_item, "min_length"):
284
+ schema["minLength"] = meta_item.min_length
285
+ if hasattr(meta_item, "max_length"):
286
+ schema["maxLength"] = meta_item.max_length
287
+
288
+ # Also check for description on the Field itself
289
+ if hasattr(constraint, "description") and constraint.description:
290
+ schema["description"] = constraint.description
291
+
292
+ return schema
293
+
294
+ if is_literal_type(type_hint):
295
+ args = get_args(type_hint)
296
+ # Determine type from first value
297
+ first_val = args[0] if args else ""
298
+ val_type = type(first_val)
299
+
300
+ # Determine JSON Schema type from Python type
301
+ if val_type in SchemaGenerator.BASIC_TYPE_MAP:
302
+ json_type = SchemaGenerator.BASIC_TYPE_MAP[val_type]["type"]
303
+ else:
304
+ json_type = "string"
305
+
306
+ literal_schema: dict[str, Any] = {"type": json_type, "enum": list(args)}
307
+
308
+ return literal_schema
309
+
310
+ if is_union_type(type_hint):
311
+ args = get_args(type_hint)
312
+
313
+ # Filter out None for Optional types
314
+ non_none_args = [arg for arg in args if arg is not type(None)]
315
+
316
+ if len(non_none_args) == 1:
317
+ # Optional[T] → make nullable
318
+ schema = SchemaGenerator._type_to_json_schema(non_none_args[0])
319
+ # Allow null
320
+ if "type" in schema:
321
+ if isinstance(schema["type"], list):
322
+ schema["type"].append("null")
323
+ else:
324
+ schema["type"] = [schema["type"], "null"]
325
+ return schema
326
+
327
+ # Multiple types → anyOf
328
+ # But first, check if any of them are Callable and simplify
329
+ processed_schemas = [SchemaGenerator._type_to_json_schema(arg) for arg in non_none_args]
330
+
331
+ # Deduplicate schemas that have the same structure
332
+ unique_schemas = []
333
+ seen = set()
334
+ for schema in processed_schemas:
335
+ # Create a hashable representation
336
+ schema_repr = json.dumps(schema, sort_keys=True)
337
+ if schema_repr not in seen:
338
+ seen.add(schema_repr)
339
+ unique_schemas.append(schema)
340
+
341
+ if len(unique_schemas) == 1:
342
+ return unique_schemas[0]
343
+
344
+ return {"anyOf": unique_schemas}
345
+
346
+ if is_list_type(type_hint):
347
+ args = get_args(type_hint)
348
+ item_type = args[0] if args else Any
349
+
350
+ return {
351
+ "type": "array",
352
+ "items": SchemaGenerator._type_to_json_schema(item_type),
353
+ }
354
+
355
+ if is_dict_type(type_hint):
356
+ args = get_args(type_hint)
357
+ # If dict has typed values, try to extract schema
358
+ if len(args) >= 2:
359
+ value_type = args[1]
360
+ if value_type is not Any:
361
+ return {
362
+ "type": "object",
363
+ "additionalProperties": SchemaGenerator._type_to_json_schema(value_type),
364
+ }
365
+ return {"type": "object"}
366
+
367
+ if type_hint in SchemaGenerator.BASIC_TYPE_MAP:
368
+ return SchemaGenerator.BASIC_TYPE_MAP[type_hint].copy()
369
+
370
+ # Check if it's a type alias that we can resolve
371
+ # Type aliases like `Mode = Literal[...]` should be resolved
372
+ if hasattr(type_hint, "__value__"):
373
+ # Python 3.12+ type aliases have __value__
374
+ return SchemaGenerator._type_to_json_schema(type_hint.__value__)
375
+
376
+ # Default to string for unknown types
377
+ return {"type": "string"}
378
+
379
+ @staticmethod
380
+ def _extract_nested_structure(description: str) -> dict[str, Any] | None:
381
+ """Extract nested object structure from parameter description.
382
+
383
+ Parses docstring descriptions that define object structures with fields:
384
+ - "Each branch has: - condition: str - Expression..."
385
+ - "Dict with keys: field1: type - description..."
386
+
387
+ Args
388
+ ----
389
+ description: Parameter description text
390
+
391
+ Returns
392
+ -------
393
+ dict[str, Any] | None: JSON Schema properties dict if structure found, None otherwise
394
+
395
+ Examples
396
+ --------
397
+ >>> desc = "List of branches. Each has: - condition: str - The condition"
398
+ >>> result = SchemaGenerator._extract_nested_structure(desc)
399
+ >>> "condition" in result.get("properties", {}) if result else False
400
+ True
401
+ """
402
+ if not description:
403
+ return None
404
+
405
+ properties: dict[str, Any] = {}
406
+
407
+ # Pattern 1: "- field: type - description" (bullet list style)
408
+ # Matches lines like "- condition: str - Expression to evaluate"
409
+ bullet_pattern = re.compile(
410
+ r"-\s+(\w+):\s*(\w+)?\s*[-–—]?\s*(.*?)(?=\n\s*-|\Z)",
411
+ re.MULTILINE | re.DOTALL,
412
+ )
413
+ matches = bullet_pattern.findall(description)
414
+
415
+ for match in matches:
416
+ field_name = match[0].strip()
417
+ field_type = match[1].strip() if match[1] else "string"
418
+ field_desc = match[2].strip() if len(match) > 2 else ""
419
+
420
+ # Map common type names to JSON Schema types
421
+ type_map = {
422
+ "str": "string",
423
+ "string": "string",
424
+ "int": "integer",
425
+ "integer": "integer",
426
+ "float": "number",
427
+ "number": "number",
428
+ "bool": "boolean",
429
+ "boolean": "boolean",
430
+ "list": "array",
431
+ "array": "array",
432
+ "dict": "object",
433
+ "object": "object",
434
+ }
435
+
436
+ json_type = type_map.get(field_type.lower(), "string")
437
+ prop_schema: dict[str, Any] = {"type": json_type}
438
+ if field_desc:
439
+ prop_schema["description"] = field_desc
440
+
441
+ properties[field_name] = prop_schema
442
+
443
+ if properties:
444
+ return {"type": "object", "properties": properties}
445
+
446
+ return None
447
+
448
+ @staticmethod
449
+ def _extract_param_docs(func: Callable) -> dict[str, str]:
450
+ """Extract parameter descriptions from function docstring.
451
+
452
+ Supports multiple docstring formats:
453
+ - Google style (Args:)
454
+ - NumPy style (Parameters with --- separator)
455
+ - Sphinx style (:param name:)
456
+
457
+ Args
458
+ ----
459
+ func: Function to extract docs from
460
+
461
+ Returns
462
+ -------
463
+ dict[str, str]: Mapping of parameter name to description
464
+
465
+ Examples
466
+ --------
467
+ >>> def func(name: str, count: int):
468
+ ... '''Function description.
469
+ ...
470
+ ... Args:
471
+ ... name: The name parameter
472
+ ... count: The count parameter
473
+ ... '''
474
+ ... pass
475
+ >>> docs = SchemaGenerator._extract_param_docs(func)
476
+ >>> docs['name']
477
+ 'The name parameter'
478
+ """
479
+ docstring = inspect.getdoc(func)
480
+ if not docstring:
481
+ return {}
482
+
483
+ param_docs: dict[str, str] = {}
484
+ lines = docstring.split("\n")
485
+
486
+ # Look for Args/Parameters section
487
+ in_params_section = False
488
+ current_param: str | None = None
489
+ is_numpy_style = False
490
+
491
+ for i, line in enumerate(lines):
492
+ line_stripped = line.strip()
493
+
494
+ # Check for NumPy-style separator (line of dashes after Parameters header)
495
+ # Also check if this is a short separator (4 chars like "----") which may be
496
+ # a formatting artifact rather than a true NumPy-style separator
497
+ if (
498
+ in_params_section
499
+ and line_stripped
500
+ and len(line_stripped) >= 3
501
+ and all(c in ("-", "=", "_") for c in line_stripped)
502
+ ):
503
+ # Only treat as NumPy style if the separator is long enough (>=10 chars)
504
+ # Short separators like "----" are often just formatting
505
+ if len(line_stripped) >= 10:
506
+ is_numpy_style = True
507
+ continue
508
+
509
+ # Detect start of parameters section (case insensitive)
510
+ if line_stripped.lower() in (
511
+ "args:",
512
+ "arguments:",
513
+ "parameters:",
514
+ "params:",
515
+ "args",
516
+ "arguments",
517
+ "parameters",
518
+ "params",
519
+ ):
520
+ in_params_section = True
521
+ continue
522
+
523
+ # Exit parameters section when we hit another section header
524
+ if in_params_section:
525
+ # NumPy style: section headers are followed by separator lines
526
+ # Check if next line is a separator (indicating new section)
527
+ if i + 1 < len(lines):
528
+ next_line = lines[i + 1].strip()
529
+ if (
530
+ line_stripped
531
+ and not line.startswith((" ", "\t"))
532
+ and next_line
533
+ and len(next_line) >= 3
534
+ and all(c in ("-", "=", "_") for c in next_line)
535
+ ):
536
+ break
537
+
538
+ # Google style: section headers end with :
539
+ if (
540
+ line_stripped
541
+ and line_stripped.endswith(":")
542
+ and not line.startswith(" ")
543
+ and line_stripped.lower() not in ("args:", "parameters:", "params:")
544
+ ):
545
+ break
546
+
547
+ # Check for section keywords
548
+ if (
549
+ line
550
+ and not line.startswith((" ", "\t"))
551
+ and line_stripped
552
+ and any(
553
+ keyword in line_stripped.lower()
554
+ for keyword in [
555
+ "returns",
556
+ "raises",
557
+ "yields",
558
+ "examples",
559
+ "notes",
560
+ "see also",
561
+ ]
562
+ )
563
+ ):
564
+ break
565
+
566
+ # Process parameter lines
567
+ if in_params_section:
568
+ # Sphinx style: ":param param_name: description"
569
+ if line_stripped.startswith(":param"):
570
+ parts = line_stripped.split(":", 3)
571
+ if len(parts) >= 3:
572
+ param_name = parts[1].replace("param", "").strip()
573
+ description = parts[2].strip()
574
+ param_docs[param_name] = description
575
+ current_param = param_name
576
+ # NumPy style: "param_name : type" on one line, description indented below
577
+ elif is_numpy_style and " : " in line_stripped and not line.startswith((" ", "\t")):
578
+ parts = line_stripped.split(" : ", 1)
579
+ param_name = parts[0].strip()
580
+ # Skip type info, description comes on next indented lines
581
+ if param_name and not any(
582
+ keyword in param_name.lower()
583
+ for keyword in ["return", "raise", "yield", "example", "note"]
584
+ ):
585
+ param_docs[param_name] = ""
586
+ current_param = param_name
587
+ # NumPy style continuation: indented lines are descriptions
588
+ elif (
589
+ is_numpy_style
590
+ and current_param
591
+ and line.startswith((" ", "\t"))
592
+ and line_stripped
593
+ ):
594
+ if param_docs[current_param]:
595
+ param_docs[current_param] += " " + line_stripped
596
+ else:
597
+ param_docs[current_param] = line_stripped
598
+ # Google style: "param_name: description" or "param_name (type): description"
599
+ # Only applies when NOT in NumPy mode (to avoid conflicts)
600
+ elif not is_numpy_style and ":" in line_stripped:
601
+ parts = line_stripped.split(":", 1)
602
+ if len(parts) == 2:
603
+ param_part = parts[0].strip()
604
+ description = parts[1].strip()
605
+ # Handle "param_name (type)" format
606
+ if "(" in param_part:
607
+ param_name = param_part.split("(")[0].strip()
608
+ else:
609
+ param_name = param_part
610
+ # Make sure it's not a section header
611
+ if param_name and not any(
612
+ keyword in param_name.lower()
613
+ for keyword in ["return", "raise", "yield", "example", "note"]
614
+ ):
615
+ param_docs[param_name] = description
616
+ current_param = param_name
617
+ # Google style continuation lines (indented description for current param)
618
+ elif (
619
+ not is_numpy_style
620
+ and current_param
621
+ and line.startswith((" ", "\t"))
622
+ and line_stripped
623
+ ):
624
+ if param_docs[current_param]:
625
+ param_docs[current_param] += " " + line_stripped
626
+ else:
627
+ param_docs[current_param] = line_stripped
628
+
629
+ return param_docs
630
+
631
+ @staticmethod
632
+ def _format_output(schema: dict, format: str) -> dict | str:
633
+ """Format schema output as dict, YAML, or JSON.
634
+
635
+ Args
636
+ ----
637
+ schema: JSON Schema dict
638
+ format: Output format - "dict", "yaml", or "json"
639
+
640
+ Returns
641
+ -------
642
+ dict | str: Schema in requested format
643
+ """
644
+ if format == "dict":
645
+ return schema
646
+ if format == "yaml":
647
+ yaml_str: str = yaml.dump(schema, sort_keys=False, default_flow_style=False)
648
+ return yaml_str
649
+ if format == "json":
650
+ return json.dumps(schema, indent=2)
651
+ return schema
652
+
653
+ @staticmethod
654
+ def generate_example_yaml(node_type: str, schema: dict) -> str:
655
+ """Generate example YAML from schema.
656
+
657
+ Creates a complete YAML example with:
658
+ - K8s-style structure (kind, metadata, spec)
659
+ - Default values where available
660
+ - Placeholders for required fields
661
+ - Comments for optional fields
662
+
663
+ Args
664
+ ----
665
+ node_type: Node type name (e.g., "llm_node")
666
+ schema: JSON Schema dict
667
+
668
+ Returns
669
+ -------
670
+ str: Example YAML string
671
+
672
+ Examples
673
+ --------
674
+ >>> schema = {
675
+ ... "properties": {
676
+ ... "template": {"type": "string"},
677
+ ... "model": {"type": "string", "default": "gpt-4"}
678
+ ... },
679
+ ... "required": ["template"]
680
+ ... }
681
+ >>> example = SchemaGenerator.generate_example_yaml("llm_node", schema)
682
+ >>> "kind: llm_node" in example
683
+ True
684
+ """
685
+ example: dict[str, Any] = {
686
+ "kind": node_type,
687
+ "metadata": {"name": f"my_{node_type}"},
688
+ "spec": {},
689
+ }
690
+
691
+ properties = schema.get("properties", {})
692
+ required = schema.get("required", [])
693
+
694
+ spec: dict[str, Any] = example["spec"]
695
+ for prop_name, prop_schema in properties.items():
696
+ if "default" in prop_schema:
697
+ # Use default value
698
+ spec[prop_name] = prop_schema["default"]
699
+ elif "examples" in prop_schema:
700
+ # Use first example
701
+ spec[prop_name] = prop_schema["examples"][0]
702
+ elif prop_name in required:
703
+ # Required field - add placeholder
704
+ spec[prop_name] = SchemaGenerator._placeholder_for_type(prop_schema.get("type"))
705
+ # Optional fields without defaults are omitted
706
+
707
+ yaml_output: str = yaml.dump(example, sort_keys=False, default_flow_style=False)
708
+ return yaml_output
709
+
710
+ @staticmethod
711
+ def _placeholder_for_type(json_type: str | list | None) -> Any:
712
+ """Get placeholder value for a JSON Schema type.
713
+
714
+ Args
715
+ ----
716
+ json_type: JSON Schema type string or list
717
+
718
+ Returns
719
+ -------
720
+ Any: Appropriate placeholder value
721
+
722
+ Examples
723
+ --------
724
+ >>> SchemaGenerator._placeholder_for_type("string")
725
+ 'value'
726
+ >>> SchemaGenerator._placeholder_for_type("integer")
727
+ 0
728
+ """
729
+ if isinstance(json_type, list):
730
+ json_type = json_type[0] if json_type else "string"
731
+
732
+ placeholders = {
733
+ "string": "value",
734
+ "integer": 0,
735
+ "number": 0.0,
736
+ "boolean": False,
737
+ "array": [],
738
+ "object": {},
739
+ "null": None,
740
+ }
741
+
742
+ return placeholders.get(json_type if isinstance(json_type, str) else "string", "value")