gllm-pipeline-binary 0.4.21__cp311-cp311-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. gllm_pipeline/__init__.pyi +0 -0
  2. gllm_pipeline/alias.pyi +7 -0
  3. gllm_pipeline/exclusions/__init__.pyi +4 -0
  4. gllm_pipeline/exclusions/exclusion_manager.pyi +74 -0
  5. gllm_pipeline/exclusions/exclusion_set.pyi +46 -0
  6. gllm_pipeline/pipeline/__init__.pyi +4 -0
  7. gllm_pipeline/pipeline/composer/__init__.pyi +8 -0
  8. gllm_pipeline/pipeline/composer/composer.pyi +350 -0
  9. gllm_pipeline/pipeline/composer/guard_composer.pyi +58 -0
  10. gllm_pipeline/pipeline/composer/if_else_composer.pyi +57 -0
  11. gllm_pipeline/pipeline/composer/parallel_composer.pyi +47 -0
  12. gllm_pipeline/pipeline/composer/switch_composer.pyi +57 -0
  13. gllm_pipeline/pipeline/composer/toggle_composer.pyi +48 -0
  14. gllm_pipeline/pipeline/pipeline.pyi +280 -0
  15. gllm_pipeline/pipeline/states.pyi +139 -0
  16. gllm_pipeline/router/__init__.pyi +6 -0
  17. gllm_pipeline/router/aurelio_semantic_router/__init__.pyi +3 -0
  18. gllm_pipeline/router/aurelio_semantic_router/aurelio_semantic_router.pyi +86 -0
  19. gllm_pipeline/router/aurelio_semantic_router/bytes_compat_route.pyi +40 -0
  20. gllm_pipeline/router/aurelio_semantic_router/encoders/__init__.pyi +5 -0
  21. gllm_pipeline/router/aurelio_semantic_router/encoders/em_invoker_encoder.pyi +46 -0
  22. gllm_pipeline/router/aurelio_semantic_router/encoders/langchain_encoder.pyi +50 -0
  23. gllm_pipeline/router/aurelio_semantic_router/encoders/tei_encoder.pyi +49 -0
  24. gllm_pipeline/router/aurelio_semantic_router/index/__init__.pyi +4 -0
  25. gllm_pipeline/router/aurelio_semantic_router/index/aurelio_index.pyi +65 -0
  26. gllm_pipeline/router/aurelio_semantic_router/index/azure_ai_search_aurelio_index.pyi +71 -0
  27. gllm_pipeline/router/aurelio_semantic_router/index/vector_store_adapter_index.pyi +119 -0
  28. gllm_pipeline/router/lm_based_router.pyi +60 -0
  29. gllm_pipeline/router/preset/__init__.pyi +0 -0
  30. gllm_pipeline/router/preset/aurelio/__init__.pyi +0 -0
  31. gllm_pipeline/router/preset/aurelio/router_image_domain_specific.pyi +21 -0
  32. gllm_pipeline/router/preset/lm_based/__init__.pyi +0 -0
  33. gllm_pipeline/router/preset/lm_based/router_image_domain_specific.pyi +14 -0
  34. gllm_pipeline/router/preset/preset_loader.pyi +24 -0
  35. gllm_pipeline/router/router.pyi +46 -0
  36. gllm_pipeline/router/rule_based_router.pyi +80 -0
  37. gllm_pipeline/router/similarity_based_router.pyi +72 -0
  38. gllm_pipeline/router/utils.pyi +26 -0
  39. gllm_pipeline/steps/__init__.pyi +17 -0
  40. gllm_pipeline/steps/_func.pyi +958 -0
  41. gllm_pipeline/steps/branching_step.pyi +24 -0
  42. gllm_pipeline/steps/component_step.pyi +82 -0
  43. gllm_pipeline/steps/composite_step.pyi +65 -0
  44. gllm_pipeline/steps/conditional_step.pyi +161 -0
  45. gllm_pipeline/steps/guard_step.pyi +71 -0
  46. gllm_pipeline/steps/log_step.pyi +53 -0
  47. gllm_pipeline/steps/map_reduce_step.pyi +92 -0
  48. gllm_pipeline/steps/no_op_step.pyi +40 -0
  49. gllm_pipeline/steps/parallel_step.pyi +128 -0
  50. gllm_pipeline/steps/pipeline_step.pyi +231 -0
  51. gllm_pipeline/steps/state_operator_step.pyi +75 -0
  52. gllm_pipeline/steps/step_error_handler/__init__.pyi +6 -0
  53. gllm_pipeline/steps/step_error_handler/empty_step_error_handler.pyi +20 -0
  54. gllm_pipeline/steps/step_error_handler/fallback_step_error_handler.pyi +24 -0
  55. gllm_pipeline/steps/step_error_handler/keep_step_error_handler.pyi +9 -0
  56. gllm_pipeline/steps/step_error_handler/raise_step_error_handler.pyi +9 -0
  57. gllm_pipeline/steps/step_error_handler/step_error_handler.pyi +46 -0
  58. gllm_pipeline/steps/subgraph_step.pyi +90 -0
  59. gllm_pipeline/steps/terminator_step.pyi +57 -0
  60. gllm_pipeline/types.pyi +10 -0
  61. gllm_pipeline/utils/__init__.pyi +9 -0
  62. gllm_pipeline/utils/async_utils.pyi +21 -0
  63. gllm_pipeline/utils/copy.pyi +11 -0
  64. gllm_pipeline/utils/error_handling.pyi +61 -0
  65. gllm_pipeline/utils/graph.pyi +16 -0
  66. gllm_pipeline/utils/has_inputs_mixin.pyi +50 -0
  67. gllm_pipeline/utils/input_map.pyi +12 -0
  68. gllm_pipeline/utils/mermaid.pyi +29 -0
  69. gllm_pipeline/utils/retry_converter.pyi +25 -0
  70. gllm_pipeline/utils/step_execution.pyi +19 -0
  71. gllm_pipeline.build/.gitignore +1 -0
  72. gllm_pipeline.cpython-311-darwin.so +0 -0
  73. gllm_pipeline.pyi +86 -0
  74. gllm_pipeline_binary-0.4.21.dist-info/METADATA +105 -0
  75. gllm_pipeline_binary-0.4.21.dist-info/RECORD +77 -0
  76. gllm_pipeline_binary-0.4.21.dist-info/WHEEL +5 -0
  77. gllm_pipeline_binary-0.4.21.dist-info/top_level.txt +1 -0
@@ -0,0 +1,958 @@
1
+ from gllm_core.schema import Component
2
+ from gllm_core.utils.retry import RetryConfig as RetryConfig
3
+ from gllm_datastore.cache.cache import BaseCache as BaseCache
4
+ from gllm_pipeline.alias import InputMapSpec as InputMapSpec, PipelineSteps as PipelineSteps
5
+ from gllm_pipeline.pipeline.pipeline import Pipeline as Pipeline
6
+ from gllm_pipeline.steps.component_step import ComponentStep as ComponentStep
7
+ from gllm_pipeline.steps.conditional_step import ConditionType as ConditionType, ConditionalStep as ConditionalStep, DEFAULT_BRANCH as DEFAULT_BRANCH
8
+ from gllm_pipeline.steps.guard_step import GuardStep as GuardStep
9
+ from gllm_pipeline.steps.log_step import LogStep as LogStep
10
+ from gllm_pipeline.steps.map_reduce_step import MapReduceStep as MapReduceStep
11
+ from gllm_pipeline.steps.no_op_step import NoOpStep as NoOpStep
12
+ from gllm_pipeline.steps.parallel_step import ParallelStep as ParallelStep
13
+ from gllm_pipeline.steps.pipeline_step import BasePipelineStep as BasePipelineStep
14
+ from gllm_pipeline.steps.state_operator_step import StateOperatorStep as StateOperatorStep
15
+ from gllm_pipeline.steps.step_error_handler.step_error_handler import BaseStepErrorHandler as BaseStepErrorHandler
16
+ from gllm_pipeline.steps.subgraph_step import SubgraphStep as SubgraphStep
17
+ from gllm_pipeline.steps.terminator_step import TerminatorStep as TerminatorStep
18
+ from typing import Any, Callable
19
+
20
+ def step(component: Component, input_state_map: dict[str, str] | None = None, output_state: str | list[str] | None = None, runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, emittable: bool = True, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, name: str | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None) -> ComponentStep:
21
+ '''Create a ComponentStep with a concise syntax.
22
+
23
+ This function creates a ComponentStep, which wraps a component and manages its inputs and outputs within the
24
+ pipeline.
25
+
26
+ Usage example:
27
+ We can leverage the `input_map` parameter to specify both state/config keys (as strings)
28
+ and fixed values (as any type) in a single dictionary.
29
+ ```python
30
+ retriever = Retriever()
31
+ retriever_step = step(
32
+ retriever,
33
+ input_map={
34
+ "query": "user_input",
35
+ "top_k": "config_top_k",
36
+ "conversation_id": "Val(<fixed_value>)",
37
+ }
38
+ output_state="retrieved_data",
39
+ )
40
+ ```
41
+ This will cause the step to execute the Retriever component with the following behavior:
42
+ 1. It will pass the `user_input` from the pipeline state to the `query` argument of the Retriever.
43
+ 2. It will pass the `config_top_k` from the runtime configuration to the `top_k` argument of the Retriever.
44
+ 3. It will pass the `<fixed_value>` from the `conversation_id` argument of the Retriever.
45
+ 4. It will store the `retrieved_data` from the Retriever result in the pipeline state.
46
+
47
+ Legacy Approach (will be deprecated in v0.5, please use `input_map` instead):
48
+ ```python
49
+ retriever = Retriever()
50
+ retriever_step = step(retriever, {"query": "input_query"}, "retrieved_data")
51
+ ```
52
+ This will cause the step to execute the Retriever component with the following behavior:
53
+ 1. It will pass the `input_query` from the pipeline state to the `query` argument of the Retriever.
54
+ 2. It will store the `retrieved_data` from the Retriever result in the pipeline state.
55
+
56
+
57
+ Args:
58
+ component (Component): The component to be executed in this step.
59
+ input_state_map (dict[str, str] | None): Mapping of component input arguments to pipeline state keys.
60
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
61
+ output_state (str | list[str] | None, optional): Key(s) to extract from the component result and add to the
62
+ pipeline state. If None, the component is executed but no state updates are performed. Defaults to None.
63
+ runtime_config_map (dict[str, str] | None, optional): Mapping of component arguments to runtime
64
+ configuration keys. Defaults to None, in which case an empty dictionary is used.
65
+ Will be deprecated in v0.5. Please use input_map instead.
66
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to be passed to the component. Defaults to None,
67
+ in which case an empty dictionary is used. Will be deprecated in v0.5. Please use input_map instead.
68
+ input_map (InputMapSpec, optional): Direct unified input map. If provided,
69
+ input_state_map, runtime_config_map, and fixed_args will be ignored;
70
+ otherwise it will be synthesized from the input_state_map, runtime_config_map, and fixed_args.
71
+ Defaults to None.
72
+ emittable (bool, optional): Whether an event emitter should be passed to the component, if available in the
73
+ state and not explicitly provided in any of the arguments. Defaults to True.
74
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
75
+ Defaults to None, in which case no retry config is applied.
76
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
77
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
78
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
79
+ Defaults to None, in which case no cache store is used.
80
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
81
+ Defaults to None, in which case no cache configuration is used.
82
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
83
+ name will be the component\'s class name followed by a unique identifier.
84
+
85
+ Returns:
86
+ ComponentStep: An instance of ComponentStep configured with the provided parameters.
87
+ '''
88
+ def log(message: str, is_template: bool = True, emit_kwargs: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, name: str | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None) -> LogStep:
89
+ '''Create a specialized step for logging messages.
90
+
91
+ This function creates a LogStep that logs messages within a pipeline.
92
+ It can be used to log status updates, debugging information, or any other text during pipeline execution.
93
+
94
+ The message can be a plain string or a template with placeholders for state variables.
95
+
96
+ Usage example 1 (plain message):
97
+ ```python
98
+ log_step = log("Processing document", is_template=False)
99
+ ```
100
+
101
+ Usage example 2 (template message with state variables):
102
+ ```python
103
+ log_step = log("Processing query: {query} with model: {model_name}")
104
+ ```
105
+
106
+ Args:
107
+ message (str): The message to be logged. May contain placeholders in curly braces for state variables.
108
+ is_template (bool, optional): Whether the message is a template with placeholders. Defaults to True.
109
+ emit_kwargs (dict[str, Any] | None, optional): Additional keyword arguments to pass to the event emitter.
110
+ Defaults to None.
111
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
112
+ Defaults to None, in which case no retry config is applied.
113
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
114
+ Defaults to None, in which case no cache store is used.
115
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
116
+ Defaults to None, in which case no cache configuration is used.
117
+ name (str | None, optional): A unique identifier for this pipeline step. If None, a name will be
118
+ auto-generated with the prefix "log_". Defaults to None.
119
+
120
+ Returns:
121
+ LogStep: A specialized pipeline step for logging messages.
122
+ '''
123
+ def if_else(condition: ConditionType | Callable[[dict[str, Any]], bool], if_branch: BasePipelineStep | list[BasePipelineStep], else_branch: BasePipelineStep | list[BasePipelineStep], input_state_map: dict[str, str] | None = None, output_state: str | None = None, runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, name: str | None = None) -> ConditionalStep:
124
+ '''Create a simple ConditionalStep with two branches.
125
+
126
+ This function creates a ConditionalStep that executes one of two branches based on a condition.
127
+
128
+ The condition can be either:
129
+ 1. A Component that must return exactly "true" or "false"
130
+ 2. A callable that returns a string ("true" or "false", case insensitive)
131
+ 3. A callable that returns a boolean (will be converted to "true"/"false")
132
+
133
+ For boolean conditions and string conditions, True/true/TRUE maps to the if_branch
134
+ and False/false/FALSE maps to the else_branch.
135
+
136
+ Usage example with a Callable condition:
137
+ ```python
138
+ # Using a Callable condition - receives merged state and config directly
139
+ condition = lambda data: data["input"] > data["threshold"]
140
+
141
+ if_branch = step(PositiveComponent(), {"input": "input"}, "output")
142
+ else_branch = step(NegativeComponent(), {"input": "input"}, "output")
143
+
144
+ if_else_step = if_else(
145
+ condition,
146
+ if_branch,
147
+ else_branch,
148
+ output_state="condition_result",
149
+ input_map={"threshold": Val(0)}
150
+ )
151
+
152
+ # or use the legacy approach via input_state_map, runtime_config_map, and fixed_args
153
+ Note: this approach is deprecated in v0.5. Please use input_map instead.
154
+ if_else_step = if_else(
155
+ condition,
156
+ if_branch,
157
+ else_branch,
158
+ output_state="condition_result",
159
+ fixed_args={"threshold": 0}
160
+ )
161
+ ```
162
+ This will cause the step to execute the PositiveComponent if the `input` in the pipeline state is greater than
163
+ the threshold (0), and the NegativeComponent otherwise. The result of the condition will be stored in the
164
+ pipeline state under the key `condition_result`.
165
+
166
+ Usage example with a Component condition:
167
+ ```python
168
+ # Using a Component condition - requires input_state_map and runtime_config_map
169
+ threshold_checker = ThresholdChecker() # A Component that returns "true" or "false"
170
+
171
+ if_branch = step(PositiveComponent(), {"input": "input"}, "output")
172
+ else_branch = step(NegativeComponent(), {"input": "input"}, "output")
173
+
174
+ if_else_step = if_else(
175
+ threshold_checker,
176
+ if_branch,
177
+ else_branch,
178
+ output_state="condition_result",
179
+ input_map={
180
+ "value": "input",
181
+ "threshold": "threshold_config",
182
+ "strict_mode": Val(True),
183
+ }
184
+ )
185
+
186
+ # or use the legacy approach via input_state_map, runtime_config_map, and fixed_args
187
+ Note: this approach is deprecated in v0.5. Please use input_map instead.
188
+ if_else_step = if_else(
189
+ threshold_checker,
190
+ if_branch,
191
+ else_branch,
192
+ input_state_map={"value": "input"},
193
+ output_state="condition_result",
194
+ runtime_config_map={"threshold": "threshold_config"},
195
+ fixed_args={"strict_mode": True}
196
+ )
197
+
198
+ ```
199
+ This will cause the step to execute the ThresholdChecker component with the `input` from the pipeline state
200
+ as its `value` parameter and the `threshold_config` from runtime configuration as its `threshold` parameter.
201
+ Based on the component\'s result ("true" or "false"), it will execute either the PositiveComponent or
202
+ the NegativeComponent.
203
+
204
+
205
+ Args:
206
+ condition (ConditionType | Callable[[dict[str, Any]], bool]): The condition to evaluate.
207
+ if_branch (BasePipelineStep | list[BasePipelineStep]): Step(s) to execute if condition is true.
208
+ else_branch (BasePipelineStep | list[BasePipelineStep]): Step(s) to execute if condition is false.
209
+ input_state_map (dict[str, str] | None, optional): Mapping of condition input arguments to pipeline state keys.
210
+ This is only used if the condition is a `Component`. If the condition is a `Callable`, it receives
211
+ a merged dictionary of the pipeline\'s state and config directly, and this parameter is ignored.
212
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
213
+ output_state (str | None, optional): Key to store the condition result in the pipeline state. Defaults to None.
214
+ runtime_config_map (dict[str, str] | None, optional): Mapping of condition input arguments to runtime
215
+ configuration keys. This is only used if the condition is a `Component`. If the condition is a `Callable`,
216
+ it receives a merged dictionary of the pipeline\'s state and config directly, and this parameter is ignored.
217
+ Defaults to None, in which case an empty dictionary is used. Will be deprecated in v0.5.
218
+ Please use input_map instead.
219
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to be passed to the condition. Defaults to None,
220
+ in which case an empty dictionary is used. Will be deprecated in v0.5. Please use input_map instead.
221
+ input_map (InputMapSpec, optional): Direct unified input map. If provided, input_state_map, runtime_config_map,
222
+ and fixed_args will be ignored; otherwise it will be synthesized from the input_state_map,
223
+ runtime_config_map, and fixed_args. directly; otherwise it will be synthesized from maps. Defaults to None.
224
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
225
+ Defaults to None, in which case no retry config is applied.
226
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
227
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
228
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
229
+ Defaults to None, in which case no cache store is used.
230
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
231
+ Defaults to None, in which case no cache configuration is used.
232
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
233
+ name will be "IfElse" followed by the condition\'s function name and a unique identifier.
234
+
235
+ Returns:
236
+ ConditionalStep: An instance of ConditionalStep configured with the provided parameters.
237
+ '''
238
+ def switch(condition: ConditionType, branches: dict[str, BasePipelineStep | list[BasePipelineStep]], input_state_map: dict[str, str] | None = None, output_state: str | None = None, default: BasePipelineStep | list[BasePipelineStep] | None = None, runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, name: str | None = None) -> ConditionalStep:
239
+ '''Create a ConditionalStep with multiple branches.
240
+
241
+ This function creates a ConditionalStep that can execute one of multiple branches based on a condition.
242
+
243
+ Usage example with a Callable condition:
244
+ ```python
245
+ # Using a Callable condition - receives merged state and config directly
246
+ def extract_command(data):
247
+ # Access both state and config in a single dictionary
248
+ query = data["query"]
249
+ separator = data["separator"] # From runtime config or state
250
+ return query.split(separator)[0]
251
+
252
+ branches = {
253
+ "search": step(SearchComponent(), {"query": "query"}, "search_result"),
254
+ "filter": step(FilterComponent(), {"query": "query"}, "filter_result"),
255
+ }
256
+ default = step(NoOpComponent(), {}, "no_op_result")
257
+
258
+ switch_step = switch(
259
+ extract_command,
260
+ branches,
261
+ input_map={"separator": Val(" ")}
262
+ output_state="command_type",
263
+ default=default,
264
+ )
265
+
266
+ # or use the legacy approach via input_state_map, runtime_config_map, and fixed_args
267
+ Note: this approach is deprecated in v0.5. Please use input_map instead.
268
+ switch_step = switch(
269
+ extract_command,
270
+ branches,
271
+ # input_state_map and runtime_config_map are ignored for Callable conditions
272
+ # but can still be specified (they will have no effect)
273
+ output_state="command_type",
274
+ default=default,
275
+ fixed_args={"separator": " "} # This will be merged with state and config
276
+ )
277
+ ```
278
+ This will cause the step to execute the SearchComponent if the first part of the `query` in the pipeline state
279
+ is "search", the FilterComponent if it is "filter", and the NoOpComponent otherwise. The separator is provided
280
+ as a fixed argument. The result of the condition will be stored in the pipeline state under the key
281
+ `command_type`.
282
+
283
+ Usage example with a Component condition:
284
+ ```python
285
+ # Using a Component condition - requires input_state_map and runtime_config_map
286
+ command_extractor = CommandExtractor() # A Component that extracts command from query
287
+
288
+ branches = {
289
+ "search": step(SearchComponent(), {"query": "query"}, "search_result"),
290
+ "filter": step(FilterComponent(), {"query": "query"}, "filter_result"),
291
+ "sort": step(SortComponent(), {"query": "query"}, "sort_result"),
292
+ }
293
+ default = step(DefaultComponent(), {"query": "query"}, "default_result")
294
+
295
+ switch_step = switch(
296
+ command_extractor,
297
+ branches,
298
+ input_map={"text": "query", "delimiter": "separator_config", "lowercase": Val(True)},
299
+ output_state="command_type",
300
+ default=default,
301
+ )
302
+
303
+ # or use the legacy approach via input_state_map, runtime_config_map, and fixed_args
304
+ Note: this approach is deprecated in v0.5. Please use input_map instead.
305
+ switch_step = switch(
306
+ command_extractor,
307
+ branches,
308
+ input_state_map={"text": "query"}, # Maps pipeline state to component input
309
+ output_state="command_type",
310
+ default=default,
311
+ runtime_config_map={"delimiter": "separator_config"}, # Maps runtime config to component input
312
+ fixed_args={"lowercase": True} # Fixed arguments passed directly to component
313
+ )
314
+ ```
315
+ This will cause the step to execute the CommandExtractor component with the `query` from the pipeline state
316
+ as its `text` parameter and the `separator_config` from runtime configuration as its `delimiter` parameter.
317
+ Based on the component\'s result (which should be one of "search", "filter", "sort", or something else),
318
+ it will execute the corresponding branch component or the default component.
319
+
320
+ Args:
321
+ condition (ConditionType): The condition to evaluate for branch selection.
322
+ branches (dict[str, BasePipelineStep | list[BasePipelineStep]]): Mapping of condition results to steps to
323
+ execute.
324
+ input_state_map (dict[str, str] | None, optional): Mapping of condition input arguments to pipeline state keys.
325
+ This is only used if the condition is a `Component`. If the condition is a `Callable`, it receives
326
+ a merged dictionary of the pipeline\'s state and config directly, and this parameter is ignored.
327
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
328
+ output_state (str | None, optional): Key to store the condition result in the pipeline state. Defaults to None.
329
+ default (BasePipelineStep | list[BasePipelineStep] | None, optional): Default branch to execute if no
330
+ condition matches. Defaults to None.
331
+ runtime_config_map (dict[str, str] | None, optional): Mapping of condition input arguments to runtime
332
+ configuration keys. This is only used if the condition is a `Component`. If the condition is a `Callable`,
333
+ it receives a merged dictionary of the pipeline\'s state and config directly, and this parameter is ignored.
334
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
335
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to be passed to the condition. Defaults to None.
336
+ Will be deprecated in v0.5. Please use input_map instead.
337
+ input_map (InputMapSpec, optional): Direct unified input map. If provided, input_state_map, runtime_config_map,
338
+ and fixed_args will be ignored; otherwise it will be synthesized from the input_state_map,
339
+ runtime_config_map, and fixed_args. Defaults to None.
340
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
341
+ Defaults to None, in which case no retry config is applied.
342
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
343
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
344
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
345
+ Defaults to None, in which case no cache store is used.
346
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
347
+ Defaults to None, in which case no cache configuration is used.
348
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
349
+ name will be "Switch" followed by the condition\'s function name and a unique identifier.
350
+
351
+ Returns:
352
+ ConditionalStep: An instance of ConditionalStep configured with the provided parameters.
353
+ '''
354
+ def transform(operation: Callable[[dict[str, Any]], Any], input_states: list[str] | None = None, output_state: str | list[str] | None = None, runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, name: str | None = None) -> StateOperatorStep:
355
+ '''Create a StateOperatorStep for transforming state data.
356
+
357
+ This function creates a StateOperatorStep that applies a transformation operation to the pipeline state.
358
+ Note that the function `operation` should accept a dictionary of input data and return the operation result.
359
+
360
+ Usage example:
361
+ ```python
362
+ def sort(data: dict) -> dict:
363
+ is_reverse = data["reverse"]
364
+ data["chunk"] = sorted(data["chunk"], reverse=is_reverse)
365
+
366
+ transform_step = transform(
367
+ operation=sort,
368
+ input_map=["chunk", {"reverse": Val(True)}],
369
+ output_state="output",
370
+ )
371
+
372
+ # or use the legacy approach via input_states, runtime_config_map, and fixed_args
373
+ transform_step = transform(
374
+ operation=sort,
375
+ input_states=["chunk"],
376
+ output_state="output",
377
+ runtime_config_map={"reverse": "reverse_config"},
378
+ fixed_args={"reverse": True},
379
+ )
380
+ ```
381
+ This will cause the step to execute the `sort` operation on the `chunk` in the pipeline state. The result will
382
+ be stored in the pipeline state under the key `output`. The behavior is controlled by the runtime configuration
383
+ key `reverse`.
384
+
385
+ Args:
386
+ operation (Callable[[dict[str, Any]], Any]): The operation to execute on the input data.
387
+ input_states (list[str] | None, optional): List of input state keys required by the operation.
388
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
389
+ Will be deprecated in v0.5. Please use input_map instead.
390
+ output_state (str | list[str]): Key(s) to store the operation result in the pipeline state.
391
+ runtime_config_map (dict[str, str] | None, optional): Mapping of operation input arguments to runtime
392
+ configuration keys. Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
393
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to be passed to the operation. Defaults to None.
394
+ Will be deprecated in v0.5. Please use input_map instead.
395
+ input_map (InputMapSpec, optional): Direct unified input map. If provided, input_state_map, runtime_config_map,
396
+ and fixed_args will be ignored; otherwise it will be synthesized from the input_state_map,
397
+ runtime_config_map, and fixed_args. Defaults to None.
398
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
399
+ Defaults to None, in which case no retry config is applied.
400
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
401
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
402
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
403
+ Defaults to None, in which case no cache store is used.
404
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
405
+ Defaults to None, in which case no cache configuration is used.
406
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
407
+ name will be "Transform" followed by the operation\'s function name and a unique identifier.
408
+
409
+ Returns:
410
+ StateOperatorStep: An instance of StateOperatorStep configured with the provided parameters.
411
+ '''
412
+ def bundle(input_states: list[str] | dict[str, str], output_state: str | list[str], retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, name: str | None = None) -> StateOperatorStep:
413
+ '''Create a StateOperatorStep to combine multiple state keys.
414
+
415
+ This function creates a StateOperatorStep that combines multiple keys from the pipeline state into a single output
416
+ without modifying the data.
417
+
418
+ Usage example:
419
+ ```python
420
+ bundle_step = bundle(["input1", "input2"], "output")
421
+ # Produces: {"output": {"input1": state["input1"], "input2": state["input2"]}}
422
+ ```
423
+ This will cause the step to bundle the values of `input1` and `input2` from the pipeline state into a single
424
+ dictionary. The result will be stored in the pipeline state under the key `output`.
425
+
426
+ Usage example (with remapping):
427
+ ```python
428
+ # Provide a mapping of desired output field names to source state keys
429
+ # Renames state key "user_id" to "id" in the bundled output
430
+ bundle_step = bundle({"id": "user_id"}, "output")
431
+ # Produces: {"output": {"id": state["user_id"]}}
432
+ ```
433
+
434
+ Args:
435
+ input_states (list[str] | dict[str, str]):
436
+ 1. If a list is provided, the listed state keys are bundled as-is (identity mapping).
437
+ 2. If a dict is provided, it is treated as a mapping of output field names to source state keys.
438
+ The bundled result will use the dict keys as field names.
439
+ output_state (str | list[str]): Key(s) to store the bundled data in the pipeline state.
440
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
441
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
442
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
443
+ Defaults to None, in which case no retry config is applied.
444
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
445
+ Defaults to None, in which case no cache store is used.
446
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
447
+ Defaults to None, in which case no cache configuration is used.
448
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
449
+ name will be "Bundle" followed by a unique identifier.
450
+
451
+ Returns:
452
+ StateOperatorStep: An instance of StateOperatorStep configured to bundle the input states.
453
+ '''
454
+ def guard(condition: ConditionType | Callable[[dict[str, Any]], bool], success_branch: BasePipelineStep | list[BasePipelineStep], failure_branch: BasePipelineStep | list[BasePipelineStep] | None = None, input_state_map: dict[str, str] | None = None, output_state: str | None = None, runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, name: str | None = None) -> GuardStep:
455
+ '''Create a GuardStep with a concise syntax.
456
+
457
+ This function creates a GuardStep that can terminate pipeline execution if a condition is not met.
458
+
459
+ Usage example:
460
+ ```python
461
+ auth_check = lambda state: state["is_authenticated"]
462
+ success_step = step(SuccessHandler(), {"input": "input"}, "output")
463
+ error_step = step(ErrorHandler(), {"error": "auth_error"}, "error_message")
464
+
465
+
466
+ guard_step = guard(
467
+ auth_check,
468
+ success_branch=success_step,
469
+ failure_branch=error_step,
470
+ input_map={"user_id": "current_user", "model": "auth_model", "strict_mode": Val(True)},
471
+ output_state="auth_result",
472
+ )
473
+
474
+ # or use the legacy approach via input_state_map, runtime_config_map, and fixed_args
475
+ Note: this approach is deprecated in v0.5. Please use input_map instead.
476
+ guard_step = guard(
477
+ auth_check,
478
+ success_branch=success_step,
479
+ failure_branch=error_step,
480
+ input_state_map={"user_id": "current_user"},
481
+ runtime_config_map={"model": "auth_model"},
482
+ fixed_args={"strict_mode": True},
483
+ output_state="auth_result"
484
+ )
485
+ ```
486
+
487
+ Args:
488
+ condition (ConditionType | Callable[[dict[str, Any]], bool]): The condition to evaluate.
489
+ success_branch (BasePipelineStep | list[BasePipelineStep]): Steps to execute if condition is True.
490
+ failure_branch (BasePipelineStep | list[BasePipelineStep] | None, optional): Steps to execute if condition
491
+ is False. If None, pipeline terminates immediately. Defaults to None.
492
+ input_state_map (dict[str, str] | None, optional): Mapping of condition input arguments to pipeline state keys.
493
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
494
+ output_state (str | None, optional): Key to store the condition result in the pipeline state. Defaults to None.
495
+ runtime_config_map (dict[str, str] | None, optional): Mapping of condition input arguments to runtime
496
+ configuration keys. Defaults to None, in which case an empty dictionary is used.
497
+ Will be deprecated in v0.5. Please use input_map instead.
498
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to be passed to the condition. Defaults to None,
499
+ in which case an empty dictionary is used. Will be deprecated in v0.5. Please use input_map instead.
500
+ input_map (InputMapSpec, optional): Direct unified input map. If provided, input_state_map, runtime_config_map,
501
+ and fixed_args will be ignored; otherwise it will be synthesized from the input_state_map,
502
+ runtime_config_map, and fixed_args. Defaults to None.
503
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
504
+ Defaults to None, in which case no retry config is applied.
505
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
506
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
507
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
508
+ Defaults to None, in which case no cache store is used.
509
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
510
+ Defaults to None, in which case no cache configuration is used.
511
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
512
+ name will be "Guard" followed by the condition\'s function name and a unique identifier.
513
+
514
+ Returns:
515
+ GuardStep: An instance of GuardStep configured with the provided parameters.
516
+ '''
517
+ def terminate(name: str | None = None, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None) -> TerminatorStep:
518
+ '''Create a TerminatorStep to end pipeline execution.
519
+
520
+ This function creates a TerminatorStep that explicitly terminates a branch or the entire pipeline.
521
+
522
+ Usage example:
523
+ ```python
524
+ early_exit = terminate("early_exit")
525
+
526
+ pipeline = (
527
+ step_a
528
+ | if_else(should_stop, early_exit, step_b)
529
+ | step_c
530
+ )
531
+ ```
532
+
533
+ Args:
534
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
535
+ name will be "Terminator" followed by a unique identifier.
536
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
537
+ Defaults to None, in which case no retry config is applied.
538
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
539
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
540
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
541
+ Defaults to None, in which case no cache store is used.
542
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
543
+ Defaults to None, in which case no cache configuration is used.
544
+
545
+ Returns:
546
+ TerminatorStep: An instance of TerminatorStep.
547
+ '''
548
+ def no_op(name: str | None = None) -> NoOpStep:
549
+ '''Create a NoOpStep to add a step that does nothing.
550
+
551
+ This function creates a PassThroughStep that does nothing.
552
+
553
+ Args:
554
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
555
+ name will be "NoOp" followed by a unique identifier.
556
+
557
+ Returns:
558
+ NoOpStep: An instance of NoOpStep.
559
+ '''
560
+ def toggle(condition: ConditionType | Callable[[dict[str, Any]], bool] | str, if_branch: BasePipelineStep | list[BasePipelineStep], input_state_map: dict[str, str] | None = None, output_state: str | None = None, runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, name: str | None = None) -> ConditionalStep:
561
+ '''Create a ConditionalStep that toggles between a branch and a no-op.
562
+
563
+ This function creates a ConditionalStep that executes a branch if the condition is true,
564
+ and does nothing (no-op) if the condition is false.
565
+
566
+ The condition can be:
567
+ 1. A Component that must return exactly "true" or "false"
568
+ 2. A callable that returns a string ("true" or "false", case insensitive)
569
+ 3. A callable that returns a boolean (will be converted to "true"/"false")
570
+ 4. A string key that will be looked up in the merged state data (state + runtime config + fixed args).
571
+ The value will be evaluated for truthiness - any non-empty, non-zero, non-False value will be considered True.
572
+
573
+ Usage example with a Callable condition:
574
+ ```python
575
+ # Using a Callable condition - receives merged state and config directly
576
+ condition = lambda data: data["feature_enabled"] and data["user_tier"] >= 2
577
+ feature_step = step(FeatureComponent(), {"input": "input"}, "output")
578
+
579
+ toggle_step = toggle(
580
+ condition,
581
+ feature_step,
582
+ output_state="feature_status",
583
+ input_map={"user_tier": Val(2)},
584
+ )
585
+
586
+ # or use the legacy approach via input_state_map, runtime_config_map, and fixed_args
587
+ Note: this approach is deprecated in v0.5. Please use input_map instead.
588
+ toggle_step = toggle(
589
+ condition,
590
+ feature_step,
591
+ output_state="feature_status",
592
+ fixed_args={"user_tier": 2} # This will be merged with state and config
593
+ )
594
+ ```
595
+ This will execute the FeatureComponent only if both `feature_enabled` is true and `user_tier` is at least 2.
596
+ Otherwise, it will do nothing. The condition result will be stored in the pipeline state under the key
597
+ `feature_status`.
598
+
599
+ Usage example with a Component condition:
600
+ ```python
601
+ # Using a Component condition - requires input_state_map and runtime_config_map
602
+ feature_checker = FeatureChecker() # A Component that returns "true" or "false"
603
+ feature_step = step(FeatureComponent(), {"input": "input"}, "output")
604
+
605
+ toggle_step = toggle(
606
+ feature_checker,
607
+ feature_step,
608
+ output_state="feature_status",
609
+ input_map={"user_id": "current_user", "feature_name": "target_feature", "check_permissions": Val(True)},
610
+ )
611
+
612
+ # or use the legacy approach via input_state_map, runtime_config_map, and fixed_args
613
+ Note: this approach is deprecated in v0.5. Please use input_map instead.
614
+ toggle_step = toggle(
615
+ feature_checker,
616
+ feature_step,
617
+ input_state_map={"user_id": "current_user"}, # Maps pipeline state to component input
618
+ output_state="feature_status",
619
+ runtime_config_map={"feature_name": "target_feature"}, # Maps runtime config to component input
620
+ fixed_args={"check_permissions": True} # Fixed arguments passed directly to component
621
+ )
622
+ ```
623
+ This will cause the step to execute the FeatureChecker component with the `current_user` from the pipeline state
624
+ as its `user_id` parameter and the `target_feature` from runtime configuration as its `feature_name` parameter.
625
+ Based on the component\'s result ("true" or "false"), it will either execute the FeatureComponent or do nothing.
626
+
627
+
628
+ Args:
629
+ condition (ConditionType | Callable[[dict[str, Any]], bool] | str): The condition to evaluate.
630
+ if_branch (BasePipelineStep | list[BasePipelineStep]): Step(s) to execute if condition is true.
631
+ input_state_map (dict[str, str] | None, optional): Mapping of condition input arguments to pipeline state keys.
632
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
633
+ output_state (str | None, optional): Key to store the condition result in the pipeline state. Defaults to None.
634
+ runtime_config_map (dict[str, str] | None, optional): Mapping of condition input arguments to runtime
635
+ configuration keys. Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
636
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to be passed to the condition. Defaults to None.
637
+ Will be deprecated in v0.5. Please use input_map instead.
638
+ input_map (InputMapSpec | None, optional): Direct unified input map. If provided, it is used
639
+ directly; otherwise it will be synthesized from maps. Defaults to None.
640
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
641
+ Defaults to None, in which case no retry config is applied.
642
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
643
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
644
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
645
+ Defaults to None, in which case no cache store is used.
646
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
647
+ Defaults to None, in which case no cache configuration is used.
648
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None, in which case the
649
+ name will be "Toggle" followed by a unique identifier.
650
+
651
+ Returns:
652
+ ConditionalStep: An instance of ConditionalStep configured with the provided parameters.
653
+ '''
654
+ def subgraph(subgraph: Pipeline, input_state_map: dict[str, str] | None = None, output_state_map: dict[str, str] | None = None, runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, name: str | None = None) -> SubgraphStep:
655
+ '''Create a SubgraphStep that executes another pipeline as a subgraph.
656
+
657
+ This function creates a SubgraphStep that allows for encapsulation and reuse of pipeline logic by treating
658
+ another pipeline as a step. The subgraph can have its own state schema, and this step handles the mapping
659
+ between the parent and subgraph states.
660
+
661
+ The SubgraphStep gracefully handles missing state keys - if a key specified in input_state_map is not present
662
+ in the parent state, it will be omitted from the subgraph input rather than causing an error. This allows
663
+ for flexible composition of pipelines with different state schemas.
664
+
665
+ Usage example:
666
+ ```python
667
+ from typing import TypedDict
668
+ from gllm_pipeline.pipeline.pipeline import Pipeline
669
+
670
+ # Define state schemas using TypedDict
671
+ class SubgraphState(TypedDict):
672
+ query: str
673
+ retrieved_data: list
674
+ reranked_data: list
675
+
676
+ class ParentState(TypedDict):
677
+ user_input: str
678
+ query: str
679
+ reranked: list
680
+ response: str
681
+
682
+ # Define a subgraph pipeline with its own state schema
683
+ subgraph_pipeline = Pipeline(
684
+ [
685
+ step(Retriever(), {"query": "query"}, "retrieved_data"),
686
+ step(Reranker(), {"data": "retrieved_data"}, "reranked_data")
687
+ ],
688
+ state_type=SubgraphState
689
+ )
690
+
691
+ # Use the subgraph in a parent pipeline
692
+ parent_pipeline = Pipeline(
693
+ [
694
+ step(QueryProcessor(), {"input": "user_input"}, "query"),
695
+ subgraph(
696
+ subgraph_pipeline,
697
+ input_map={"query": "query", "model": "retrieval_model", "top_k": Val(10)},
698
+ output_state_map={"reranked": "reranked_data"},
699
+ ),
700
+ step(ResponseGenerator(), {"data": "reranked"}, "response")
701
+ ],
702
+ state_type=ParentState
703
+ )
704
+
705
+ # or use the legacy approach via input_state_map, output_state_map, runtime_config_map, and fixed_args
706
+ parent_pipeline = Pipeline(
707
+ [
708
+ step(QueryProcessor(), {"input": "user_input"}, "query"),
709
+ subgraph(
710
+ subgraph_pipeline,
711
+ input_state_map={"query": "query"}, # Map parent state to subgraph input
712
+ output_state_map={"reranked": "reranked_data"}, # Map subgraph output to parent state
713
+ runtime_config_map={"model": "retrieval_model"},
714
+ fixed_args={"top_k": 10},
715
+ ),
716
+ step(ResponseGenerator(), {"data": "reranked"}, "response")
717
+ ],
718
+ state_type=ParentState
719
+ )
720
+
721
+ # When the parent pipeline runs:
722
+ # 1. QueryProcessor processes user_input and produces query
723
+ # 2. SubgraphStep creates a new state for the subgraph with query from parent
724
+ # 3. Subgraph executes its steps (Retriever → Reranker)
725
+ # 4. SubgraphStep maps reranked_data from subgraph to reranked in parent
726
+ # 5. ResponseGenerator uses reranked to produce response
727
+ ```
728
+
729
+ Args:
730
+ subgraph (Pipeline): The pipeline to be executed as a subgraph.
731
+ input_state_map (dict[str, str] | None, optional): Mapping of subgraph input keys to parent pipeline state keys.
732
+ Keys that don\'t exist in the parent state will be gracefully ignored. If None, all subgraph inputs will be
733
+ passed as-is. Will be deprecated in v0.5. Please use input_map instead.
734
+ output_state_map (dict[str, str] | None, optional): Mapping of parent pipeline state keys to subgraph
735
+ output keys. If None, all subgraph outputs will be passed as-is.
736
+ runtime_config_map (dict[str, str] | None, optional): Mapping of subgraph input keys to runtime
737
+ configuration keys. Defaults to None, in which case an empty dictionary is used.
738
+ Will be deprecated in v0.5. Please use input_map instead.
739
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to be passed to the subgraph.
740
+ Defaults to None, in which case an empty dictionary is used.Will be deprecated in v0.5.
741
+ Please use input_map instead.
742
+ input_map (InputMapSpec | None, optional): Direct unified input map. If provided, it is used
743
+ directly; otherwise it will be synthesized from maps. Defaults to None.
744
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
745
+ Defaults to None, in which case no retry config is applied.
746
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
747
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
748
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
749
+ Defaults to None, in which case no cache store is used.
750
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
751
+ Defaults to None, in which case no cache configuration is used.
752
+ name (str | None, optional): A unique identifier for this pipeline step. Defaults to None,
753
+ in which case the name will be "Subgraph" followed by a unique identifier.
754
+
755
+ Returns:
756
+ SubgraphStep: An instance of SubgraphStep configured with the provided parameters.
757
+ '''
758
+ def parallel(branches: list[PipelineSteps] | dict[str, PipelineSteps], input_states: list[str] | None = None, squash: bool = True, runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, retry_config: RetryConfig | None = None, name: str | None = None) -> ParallelStep:
759
+ '''Create a ParallelStep that executes multiple branches concurrently.
760
+
761
+ This function creates a ParallelStep that runs multiple branches in parallel and merges their results.
762
+ Each branch can be a single step or a list of steps to execute sequentially.
763
+
764
+ The step supports two execution modes controlled by the `squash` parameter:
765
+ 1. Squashed (default): Uses asyncio.gather() to run branches in parallel within a single LangGraph node. Use for:
766
+ a. Better raw performance
767
+ b. Simpler implementation
768
+ c. Less overhead
769
+ d. Less transparent for debugging and tracing
770
+ 2. Expanded (squash=False): Creates a native LangGraph structure with multiple parallel paths. Use for:
771
+ a. More native LangGraph integration
772
+ b. More transparent for debugging and tracing
773
+
774
+ For memory optimization, you can specify input_states to pass only specific keys to branches.
775
+ This is especially useful when the state is large but branches only need specific parts of it.
776
+ If input_states is None (default), all state keys will be passed.
777
+
778
+ Usage example:
779
+ 1. Define branches as a list of steps or lists of steps
780
+ ```python
781
+ parallel_step = parallel(
782
+ branches=[
783
+ step(ComponentA(), {"input": "query"}, "output_a"),
784
+ [
785
+ step(ComponentB1(), {"input": "query"}, "output_b1"),
786
+ step(ComponentB2(), {"input": "output_b1"}, "output_b2")
787
+ ],
788
+ step(ComponentC(), {"input": "query"}, "output_c")
789
+ ],
790
+ input_states=["query"], # Only \'query\' will be passed to branches
791
+ )
792
+ ```
793
+
794
+ 2. Define branches as a dictionary of branches
795
+ Other than the list format, we can also use the dictionary format for branches to
796
+ make it easier to exclude branches.
797
+ ```python
798
+ parallel_step = parallel(
799
+ branches={
800
+ "branch_a": step(ComponentA(), {"input": "query"}, "output_a"),
801
+ "branch_b": step(ComponentB(), {"input": "query"}, "output_b"),
802
+ },
803
+ input_states=["query"],
804
+ )
805
+ ```
806
+
807
+ Args:
808
+ branches
809
+ (list[PipelineSteps] | dict[str, PipelineSteps]):
810
+ Branches to execute in parallel. Each branch can be a single step
811
+ or a list of steps to execute sequentially. Can be either a list or a dictionary.
812
+ input_states (list[str] | None, optional): Keys from the state to pass to branches.
813
+ Defaults to None, in which case all state keys will be passed
814
+ Will be deprecated in v0.5. Please use input_map instead.
815
+ squash (bool, optional): Whether to squash execution into a single node.
816
+ If True, uses asyncio.gather() to run branches in parallel.
817
+ If False, uses native LangGraph structures for parallelism.
818
+ Defaults to True.
819
+ runtime_config_map (dict[str, str] | None, optional): Mapping of input keys to runtime config keys.
820
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
821
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to include in the state passed to branches.
822
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
823
+ input_map (InputMapSpec, optional): Direct unified input map. If provided, input_state_map, runtime_config_map,
824
+ and fixed_args will be ignored; otherwise it will be synthesized from the input_state_map,
825
+ runtime_config_map, and fixed_args. Defaults to None.
826
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
827
+ Defaults to None, in which case no retry config is applied.
828
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
829
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
830
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
831
+ Defaults to None, in which case no cache store is used.
832
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
833
+ Defaults to None, in which case no cache configuration is used.
834
+ name (str | None, optional): A unique identifier for this parallel step.
835
+ Defaults to None. In this case, a name will be auto-generated.
836
+
837
+ Returns:
838
+ ParallelStep: An instance of ParallelStep configured with the provided branches.
839
+ '''
840
+ def map_reduce(output_state: str, map_func: Component | Callable[[dict[str, Any]], Any], input_state_map: dict[str, str] | None = None, reduce_func: Callable[[list[Any]], Any] = ..., runtime_config_map: dict[str, str] | None = None, fixed_args: dict[str, Any] | None = None, input_map: InputMapSpec | None = None, retry_config: RetryConfig | None = None, error_handler: BaseStepErrorHandler | None = None, cache_store: BaseCache | None = None, cache_config: dict[str, Any] | None = None, name: str | None = None) -> MapReduceStep:
841
+ '''Create a MapReduceStep that maps a function over multiple inputs and reduces the results.
842
+
843
+ This function creates a step that applies a mapping function to multiple inputs in parallel
844
+ and combines the results using a reduction function.
845
+
846
+ The `map_func` receives a dictionary for each item being processed. This dictionary contains:
847
+ 1. Values from `input_state_map` (with list inputs split into individual items).
848
+ 2. Values from `runtime_config_map` (if provided).
849
+ 3. Values from `fixed_args` (if provided).
850
+
851
+ The `map_func` can be either:
852
+ 1. A callable function that takes a dictionary as input and returns a result.
853
+ 2. A `Component` instance, which will be executed with proper async handling.
854
+
855
+ Important note on parallel execution:
856
+ 1. For true parallelism, the `map_func` MUST be an async function or a `Component`.
857
+ 2. Synchronous map functions will block the event loop and run sequentially.
858
+
859
+ The step supports automatic broadcasting of scalar values and handles lists appropriately:
860
+ 1. If multiple list inputs are provided, they must be the same length.
861
+ 2. Scalar inputs are broadcasted to match list lengths.
862
+
863
+ Usage Example - Processing a list of items with an async map function:
864
+ ```python
865
+ async def count_words(item):
866
+ await asyncio.sleep(0.1) # Simulate I/O operation
867
+ return len(item["document"].split())
868
+
869
+ process_docs = map_reduce(
870
+ input_state_map={
871
+ "document": "documents" # A list, e.g. ["doc1...", "doc2...", "doc3..."]
872
+ },
873
+ output_state="word_counts", # A list of word counts for each document
874
+ map_func=count_words,
875
+ reduce_func=lambda results: sum(results), # Sum word counts
876
+ )
877
+
878
+ # When executed with {"documents": ["doc1...", "doc2...", "doc3..."]},
879
+ # returns {"word_counts": 60} (total word count)
880
+ ```
881
+
882
+ Usage Example - Broadcasting scalar values to match list length:
883
+ ```python
884
+ # Apply a common threshold to multiple values
885
+ threshold_check = map_reduce(
886
+ input_state_map={
887
+ "value": "values", # A list: [5, 10, 15]
888
+ "threshold": "threshold", # A scalar: 8 (will be broadcast)
889
+ },
890
+ output_state="above_threshold",
891
+ map_func=lambda item: item["value"] > item["threshold"],
892
+ reduce_func=lambda results: results # Return list of boolean results
893
+ )
894
+ # When executed with {"values": [5, 10, 15], "threshold": 8},
895
+ # returns {"above_threshold": [False, True, True]}
896
+ ```
897
+
898
+ Usage Example - Multiple list inputs with the same length:
899
+ ```python
900
+ similarity_step = map_reduce(
901
+ input_state_map={
902
+ "doc1": "documents_a", # ["doc1", "doc2", "doc3"]
903
+ "doc2": "documents_b", # ["docA", "docB", "docC"]
904
+ },
905
+ output_state="similarity_scores",
906
+ map_func=lambda item: calculate_similarity(item["doc1"], item["doc2"]),
907
+ reduce_func=lambda results: sum(results) / len(results) # Average similarity
908
+ )
909
+ # When executed with {"documents_a": ["doc1", "doc2", "doc3"], "documents_b": ["docA", "docB", "docC"]},
910
+ # returns {"similarity_scores": 0.75}
911
+ ```
912
+
913
+ Usage Example - Using a Component for complex processing instead of a map function:
914
+ ```python
915
+ summarizer = TextSummarizer() # Subclass of Component
916
+ summarize_step = map_reduce(
917
+ input_state_map={
918
+ "text": "documents", # List of documents to summarize
919
+ "max_length": "max_length", # Scalar parameter (broadcasted)
920
+ },
921
+ output_state="summaries",
922
+ map_func=summarizer,
923
+ reduce_func=lambda results: [r["summary"] for r in results]
924
+ )
925
+ # When executed with {"documents": ["doc1...", "doc2..."], "max_length": 50},
926
+ # returns {"summaries": ["summary1...", "summary2..."]}
927
+ ```
928
+
929
+ Args:
930
+ output_state (str): Key to store the reduced result in the pipeline state.
931
+ map_func (Component | Callable[[dict[str, Any]], Any]): Function to apply to each input item.
932
+ The map function receives a dictionary containing the input values derived from input_state_map,
933
+ runtime_config_map, and fixed_args.
934
+ reduce_func (Callable[[list[Any]], Any], optional): Function to reduce the mapped results.
935
+ Defaults to a function that returns the list of results as is.
936
+ input_state_map (dict[str, str] | None, optional): Mapping of function arguments to pipeline state keys.
937
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
938
+ runtime_config_map (dict[str, str] | None, optional): Mapping of arguments to runtime config keys.
939
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
940
+ fixed_args (dict[str, Any] | None, optional): Fixed arguments to pass to the functions.
941
+ Defaults to None. Will be deprecated in v0.5. Please use input_map instead.
942
+ input_map (InputMapSpec, optional): Direct unified input map. If provided, input_state_map, runtime_config_map,
943
+ and fixed_args will be ignored; otherwise it will be synthesized from the input_state_map,
944
+ runtime_config_map, and fixed_args. Defaults to None.
945
+ retry_config (RetryConfig | None, optional): Configuration for retry behavior using GLLM Core\'s RetryConfig.
946
+ Defaults to None, in which case no retry config is applied.
947
+ error_handler (BaseStepErrorHandler | None, optional): Strategy to handle errors during execution.
948
+ Defaults to None, in which case the RaiseStepErrorHandler is used.
949
+ cache_store ("BaseCache" | None, optional): Cache store to be used for caching.
950
+ Defaults to None, in which case no cache store is used.
951
+ cache_config (dict[str, Any] | None, optional): Cache configuration to be used for caching.
952
+ Defaults to None, in which case no cache configuration is used.
953
+ name (str | None, optional): A unique identifier for this step. Defaults to None, in which case the name will be
954
+ "MapReduce" followed by the map function name.
955
+
956
+ Returns:
957
+ MapReduceStep: An instance of MapReduceStep configured with the provided parameters.
958
+ '''