sdg-hub 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +16 -3
- sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
- sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
- sdg_hub/core/blocks/llm/__init__.py +2 -0
- sdg_hub/core/blocks/llm/client_manager.py +61 -24
- sdg_hub/core/blocks/llm/config.py +1 -0
- sdg_hub/core/blocks/llm/llm_chat_block.py +62 -7
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +653 -0
- sdg_hub/core/blocks/llm/text_parser_block.py +75 -30
- sdg_hub/core/blocks/registry.py +49 -35
- sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
- sdg_hub/core/flow/base.py +370 -20
- sdg_hub/core/flow/checkpointer.py +333 -0
- sdg_hub/core/flow/metadata.py +45 -0
- sdg_hub/core/flow/migration.py +12 -1
- sdg_hub/core/flow/registry.py +121 -58
- sdg_hub/core/flow/validation.py +12 -0
- sdg_hub/core/utils/__init__.py +2 -1
- sdg_hub/core/utils/datautils.py +81 -1
- sdg_hub/core/utils/flow_id_words.yaml +231 -0
- sdg_hub/core/utils/flow_identifier.py +94 -0
- sdg_hub/core/utils/yaml_utils.py +59 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -7
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/METADATA +59 -31
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/RECORD +30 -25
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/WHEEL +0 -0
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/top_level.txt +0 -0
sdg_hub/core/flow/base.py
CHANGED
@@ -4,19 +4,28 @@
|
|
4
4
|
# Standard
|
5
5
|
from pathlib import Path
|
6
6
|
from typing import Any, Optional, Union
|
7
|
+
import time
|
7
8
|
|
8
9
|
# Third Party
|
9
10
|
from datasets import Dataset
|
10
11
|
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
12
|
+
from rich.console import Console
|
13
|
+
from rich.panel import Panel
|
14
|
+
from rich.table import Table
|
15
|
+
from rich.tree import Tree
|
16
|
+
import datasets
|
11
17
|
import yaml
|
12
18
|
|
13
19
|
# Local
|
14
20
|
from ..blocks.base import BaseBlock
|
15
21
|
from ..blocks.registry import BlockRegistry
|
22
|
+
from ..utils.datautils import safe_concatenate_with_validation, validate_no_duplicates
|
16
23
|
from ..utils.error_handling import EmptyDatasetError, FlowValidationError
|
17
24
|
from ..utils.logger_config import setup_logger
|
18
25
|
from ..utils.path_resolution import resolve_path
|
19
|
-
from .
|
26
|
+
from ..utils.yaml_utils import save_flow_yaml
|
27
|
+
from .checkpointer import FlowCheckpointer
|
28
|
+
from .metadata import DatasetRequirements, FlowMetadata, FlowParameter
|
20
29
|
from .migration import FlowMigration
|
21
30
|
from .validation import FlowValidator
|
22
31
|
|
@@ -133,7 +142,17 @@ class Flow(BaseModel):
|
|
133
142
|
-------
|
134
143
|
Flow
|
135
144
|
Validated Flow instance.
|
145
|
+
|
146
|
+
Raises
|
147
|
+
------
|
148
|
+
FlowValidationError
|
149
|
+
If yaml_path is None or the file doesn't exist.
|
136
150
|
"""
|
151
|
+
if yaml_path is None:
|
152
|
+
raise FlowValidationError(
|
153
|
+
"Flow path cannot be None. Please provide a valid YAML file path or check that the flow exists in the registry."
|
154
|
+
)
|
155
|
+
|
137
156
|
yaml_path = resolve_path(yaml_path, [])
|
138
157
|
yaml_dir = Path(yaml_path).parent
|
139
158
|
|
@@ -160,6 +179,8 @@ class Flow(BaseModel):
|
|
160
179
|
flow_config, migrated_runtime_params = FlowMigration.migrate_to_new_format(
|
161
180
|
flow_config, yaml_path
|
162
181
|
)
|
182
|
+
# Save migrated config back to YAML to persist id
|
183
|
+
save_flow_yaml(yaml_path, flow_config, "migrated to new format")
|
163
184
|
|
164
185
|
# Validate YAML structure
|
165
186
|
validator = FlowValidator()
|
@@ -221,6 +242,17 @@ class Flow(BaseModel):
|
|
221
242
|
# Create and validate the flow
|
222
243
|
try:
|
223
244
|
flow = cls(blocks=blocks, metadata=metadata, parameters=parameters)
|
245
|
+
# Persist generated id back to the YAML file (only on initial load)
|
246
|
+
# If the file had no metadata.id originally, update and rewrite
|
247
|
+
if not flow_config.get("metadata", {}).get("id"):
|
248
|
+
flow_config.setdefault("metadata", {})["id"] = flow.metadata.id
|
249
|
+
save_flow_yaml(
|
250
|
+
yaml_path,
|
251
|
+
flow_config,
|
252
|
+
f"added generated id: {flow.metadata.id}",
|
253
|
+
)
|
254
|
+
else:
|
255
|
+
logger.debug(f"Flow already had id: {flow.metadata.id}")
|
224
256
|
# Store migrated runtime params and client for backward compatibility
|
225
257
|
if migrated_runtime_params:
|
226
258
|
flow._migrated_runtime_params = migrated_runtime_params
|
@@ -275,13 +307,11 @@ class Flow(BaseModel):
|
|
275
307
|
|
276
308
|
# Get block class from registry
|
277
309
|
try:
|
278
|
-
block_class = BlockRegistry.
|
310
|
+
block_class = BlockRegistry._get(block_type_name)
|
279
311
|
except KeyError as exc:
|
280
312
|
# Get all available blocks from all categories
|
281
|
-
all_blocks = BlockRegistry.
|
282
|
-
available_blocks = ", ".join(
|
283
|
-
[block for blocks in all_blocks.values() for block in blocks]
|
284
|
-
)
|
313
|
+
all_blocks = BlockRegistry.list_blocks()
|
314
|
+
available_blocks = ", ".join(all_blocks)
|
285
315
|
raise FlowValidationError(
|
286
316
|
f"Block type '{block_type_name}' not found in registry. "
|
287
317
|
f"Available blocks: {available_blocks}"
|
@@ -324,6 +354,9 @@ class Flow(BaseModel):
|
|
324
354
|
self,
|
325
355
|
dataset: Dataset,
|
326
356
|
runtime_params: Optional[dict[str, dict[str, Any]]] = None,
|
357
|
+
checkpoint_dir: Optional[str] = None,
|
358
|
+
save_freq: Optional[int] = None,
|
359
|
+
max_concurrency: Optional[int] = None,
|
327
360
|
) -> Dataset:
|
328
361
|
"""Execute the flow blocks in sequence to generate data.
|
329
362
|
|
@@ -340,6 +373,14 @@ class Flow(BaseModel):
|
|
340
373
|
"block_name": {"param1": value1, "param2": value2},
|
341
374
|
"other_block": {"param3": value3}
|
342
375
|
}
|
376
|
+
checkpoint_dir : Optional[str], optional
|
377
|
+
Directory to save/load checkpoints. If provided, enables checkpointing.
|
378
|
+
save_freq : Optional[int], optional
|
379
|
+
Number of completed samples after which to save a checkpoint.
|
380
|
+
If None, only saves final results when checkpointing is enabled.
|
381
|
+
max_concurrency : Optional[int], optional
|
382
|
+
Maximum number of concurrent requests across all blocks.
|
383
|
+
Controls async request concurrency to prevent overwhelming servers.
|
343
384
|
|
344
385
|
Returns
|
345
386
|
-------
|
@@ -353,6 +394,26 @@ class Flow(BaseModel):
|
|
353
394
|
FlowValidationError
|
354
395
|
If flow validation fails or if model configuration is required but not set.
|
355
396
|
"""
|
397
|
+
# Validate save_freq parameter early to prevent range() errors
|
398
|
+
if save_freq is not None and save_freq <= 0:
|
399
|
+
raise FlowValidationError(
|
400
|
+
f"save_freq must be greater than 0, got {save_freq}"
|
401
|
+
)
|
402
|
+
|
403
|
+
# Validate max_concurrency parameter
|
404
|
+
if max_concurrency is not None:
|
405
|
+
# Explicitly reject boolean values (bool is a subclass of int in Python)
|
406
|
+
if isinstance(max_concurrency, bool) or not isinstance(
|
407
|
+
max_concurrency, int
|
408
|
+
):
|
409
|
+
raise FlowValidationError(
|
410
|
+
f"max_concurrency must be an int, got {type(max_concurrency).__name__}"
|
411
|
+
)
|
412
|
+
if max_concurrency <= 0:
|
413
|
+
raise FlowValidationError(
|
414
|
+
f"max_concurrency must be greater than 0, got {max_concurrency}"
|
415
|
+
)
|
416
|
+
|
356
417
|
# Validate preconditions
|
357
418
|
if not self.blocks:
|
358
419
|
raise FlowValidationError("Cannot generate with empty flow")
|
@@ -360,6 +421,8 @@ class Flow(BaseModel):
|
|
360
421
|
if len(dataset) == 0:
|
361
422
|
raise EmptyDatasetError("Input dataset is empty")
|
362
423
|
|
424
|
+
validate_no_duplicates(dataset)
|
425
|
+
|
363
426
|
# Check if model configuration has been set for flows with LLM blocks
|
364
427
|
llm_blocks = self._detect_llm_blocks()
|
365
428
|
if llm_blocks and not self._model_config_set:
|
@@ -376,18 +439,131 @@ class Flow(BaseModel):
|
|
376
439
|
"Dataset validation failed:\n" + "\n".join(dataset_errors)
|
377
440
|
)
|
378
441
|
|
442
|
+
# Log concurrency control if specified
|
443
|
+
if max_concurrency is not None:
|
444
|
+
logger.info(f"Using max_concurrency={max_concurrency} for LLM requests")
|
445
|
+
|
446
|
+
# Initialize checkpointer if enabled
|
447
|
+
checkpointer = None
|
448
|
+
completed_dataset = None
|
449
|
+
if checkpoint_dir:
|
450
|
+
checkpointer = FlowCheckpointer(
|
451
|
+
checkpoint_dir=checkpoint_dir,
|
452
|
+
save_freq=save_freq,
|
453
|
+
flow_id=self.metadata.id,
|
454
|
+
)
|
455
|
+
|
456
|
+
# Load existing progress
|
457
|
+
remaining_dataset, completed_dataset = checkpointer.load_existing_progress(
|
458
|
+
dataset
|
459
|
+
)
|
460
|
+
|
461
|
+
if len(remaining_dataset) == 0:
|
462
|
+
logger.info("All samples already completed, returning existing results")
|
463
|
+
return completed_dataset
|
464
|
+
|
465
|
+
dataset = remaining_dataset
|
466
|
+
logger.info(f"Resuming with {len(dataset)} remaining samples")
|
467
|
+
|
379
468
|
logger.info(
|
380
469
|
f"Starting flow '{self.metadata.name}' v{self.metadata.version} "
|
381
470
|
f"with {len(dataset)} samples across {len(self.blocks)} blocks"
|
471
|
+
+ (f" (max_concurrency={max_concurrency})" if max_concurrency else "")
|
382
472
|
)
|
383
473
|
|
384
|
-
current_dataset = dataset
|
385
474
|
# Merge migrated runtime params with provided ones (provided ones take precedence)
|
386
475
|
merged_runtime_params = self._migrated_runtime_params.copy()
|
387
476
|
if runtime_params:
|
388
477
|
merged_runtime_params.update(runtime_params)
|
389
478
|
runtime_params = merged_runtime_params
|
390
479
|
|
480
|
+
# Process dataset in chunks if checkpointing with save_freq
|
481
|
+
if checkpointer and save_freq:
|
482
|
+
all_processed = []
|
483
|
+
|
484
|
+
# Process in chunks of save_freq
|
485
|
+
for i in range(0, len(dataset), save_freq):
|
486
|
+
chunk_end = min(i + save_freq, len(dataset))
|
487
|
+
chunk_dataset = dataset.select(range(i, chunk_end))
|
488
|
+
|
489
|
+
logger.info(
|
490
|
+
f"Processing chunk {i // save_freq + 1}: samples {i} to {chunk_end - 1}"
|
491
|
+
)
|
492
|
+
|
493
|
+
# Execute all blocks on this chunk
|
494
|
+
processed_chunk = self._execute_blocks_on_dataset(
|
495
|
+
chunk_dataset, runtime_params, max_concurrency
|
496
|
+
)
|
497
|
+
all_processed.append(processed_chunk)
|
498
|
+
|
499
|
+
# Save checkpoint after chunk completion
|
500
|
+
checkpointer.add_completed_samples(processed_chunk)
|
501
|
+
|
502
|
+
# Save final checkpoint for any remaining samples
|
503
|
+
checkpointer.save_final_checkpoint()
|
504
|
+
|
505
|
+
# Combine all processed chunks
|
506
|
+
final_dataset = safe_concatenate_with_validation(
|
507
|
+
all_processed, "processed chunks from flow execution"
|
508
|
+
)
|
509
|
+
|
510
|
+
# Combine with previously completed samples if any
|
511
|
+
if checkpointer and completed_dataset:
|
512
|
+
final_dataset = safe_concatenate_with_validation(
|
513
|
+
[completed_dataset, final_dataset],
|
514
|
+
"completed checkpoint data with newly processed data",
|
515
|
+
)
|
516
|
+
|
517
|
+
else:
|
518
|
+
# Process entire dataset at once
|
519
|
+
final_dataset = self._execute_blocks_on_dataset(
|
520
|
+
dataset, runtime_params, max_concurrency
|
521
|
+
)
|
522
|
+
|
523
|
+
# Save final checkpoint if checkpointing enabled
|
524
|
+
if checkpointer:
|
525
|
+
checkpointer.add_completed_samples(final_dataset)
|
526
|
+
checkpointer.save_final_checkpoint()
|
527
|
+
|
528
|
+
# Combine with previously completed samples if any
|
529
|
+
if completed_dataset:
|
530
|
+
final_dataset = safe_concatenate_with_validation(
|
531
|
+
[completed_dataset, final_dataset],
|
532
|
+
"completed checkpoint data with newly processed data",
|
533
|
+
)
|
534
|
+
|
535
|
+
logger.info(
|
536
|
+
f"Flow '{self.metadata.name}' completed successfully: "
|
537
|
+
f"{len(final_dataset)} final samples, "
|
538
|
+
f"{len(final_dataset.column_names)} final columns"
|
539
|
+
)
|
540
|
+
|
541
|
+
return final_dataset
|
542
|
+
|
543
|
+
def _execute_blocks_on_dataset(
|
544
|
+
self,
|
545
|
+
dataset: Dataset,
|
546
|
+
runtime_params: dict[str, dict[str, Any]],
|
547
|
+
max_concurrency: Optional[int] = None,
|
548
|
+
) -> Dataset:
|
549
|
+
"""Execute all blocks in sequence on the given dataset.
|
550
|
+
|
551
|
+
Parameters
|
552
|
+
----------
|
553
|
+
dataset : Dataset
|
554
|
+
Dataset to process through all blocks.
|
555
|
+
runtime_params : Dict[str, Dict[str, Any]]
|
556
|
+
Runtime parameters for block execution.
|
557
|
+
max_concurrency : Optional[int], optional
|
558
|
+
Maximum concurrency for LLM requests across blocks.
|
559
|
+
|
560
|
+
Returns
|
561
|
+
-------
|
562
|
+
Dataset
|
563
|
+
Dataset after processing through all blocks.
|
564
|
+
"""
|
565
|
+
current_dataset = dataset
|
566
|
+
|
391
567
|
# Execute blocks in sequence
|
392
568
|
for i, block in enumerate(self.blocks):
|
393
569
|
logger.info(
|
@@ -398,6 +574,10 @@ class Flow(BaseModel):
|
|
398
574
|
# Prepare block execution parameters
|
399
575
|
block_kwargs = self._prepare_block_kwargs(block, runtime_params)
|
400
576
|
|
577
|
+
# Add max_concurrency to block kwargs if provided
|
578
|
+
if max_concurrency is not None:
|
579
|
+
block_kwargs["_flow_max_concurrency"] = max_concurrency
|
580
|
+
|
401
581
|
try:
|
402
582
|
# Check if this is a deprecated block and skip validations
|
403
583
|
is_deprecated_block = (
|
@@ -436,12 +616,6 @@ class Flow(BaseModel):
|
|
436
616
|
f"Block '{block.block_name}' execution failed: {exc}"
|
437
617
|
) from exc
|
438
618
|
|
439
|
-
logger.info(
|
440
|
-
f"Flow '{self.metadata.name}' completed successfully: "
|
441
|
-
f"{len(current_dataset)} final samples, "
|
442
|
-
f"{len(current_dataset.column_names)} final columns"
|
443
|
-
)
|
444
|
-
|
445
619
|
return current_dataset
|
446
620
|
|
447
621
|
def _prepare_block_kwargs(
|
@@ -760,6 +934,8 @@ class Flow(BaseModel):
|
|
760
934
|
if len(dataset) == 0:
|
761
935
|
raise EmptyDatasetError("Input dataset is empty")
|
762
936
|
|
937
|
+
validate_no_duplicates(dataset)
|
938
|
+
|
763
939
|
# Use smaller sample size if dataset is smaller
|
764
940
|
actual_sample_size = min(sample_size, len(dataset))
|
765
941
|
|
@@ -784,9 +960,6 @@ class Flow(BaseModel):
|
|
784
960
|
"execution_time_seconds": 0,
|
785
961
|
}
|
786
962
|
|
787
|
-
# Standard
|
788
|
-
import time
|
789
|
-
|
790
963
|
start_time = time.time()
|
791
964
|
|
792
965
|
try:
|
@@ -930,6 +1103,186 @@ class Flow(BaseModel):
|
|
930
1103
|
"block_names": [block.block_name for block in self.blocks],
|
931
1104
|
}
|
932
1105
|
|
1106
|
+
def get_dataset_requirements(self) -> Optional[DatasetRequirements]:
|
1107
|
+
"""Get the dataset requirements for this flow.
|
1108
|
+
|
1109
|
+
Returns
|
1110
|
+
-------
|
1111
|
+
Optional[DatasetRequirements]
|
1112
|
+
Dataset requirements object or None if not defined.
|
1113
|
+
|
1114
|
+
Examples
|
1115
|
+
--------
|
1116
|
+
>>> flow = Flow.from_yaml("path/to/flow.yaml")
|
1117
|
+
>>> requirements = flow.get_dataset_requirements()
|
1118
|
+
>>> if requirements:
|
1119
|
+
... print(f"Required columns: {requirements.required_columns}")
|
1120
|
+
"""
|
1121
|
+
return self.metadata.dataset_requirements
|
1122
|
+
|
1123
|
+
def get_dataset_schema(self) -> Dataset:
|
1124
|
+
"""Get an empty dataset with the correct schema for this flow.
|
1125
|
+
|
1126
|
+
Returns
|
1127
|
+
-------
|
1128
|
+
Dataset
|
1129
|
+
Empty HuggingFace Dataset with the correct schema/features for this flow.
|
1130
|
+
Users can add data to this dataset or use it to validate their own dataset schema.
|
1131
|
+
|
1132
|
+
Examples
|
1133
|
+
--------
|
1134
|
+
>>> flow = Flow.from_yaml("path/to/flow.yaml")
|
1135
|
+
>>> schema_dataset = flow.get_dataset_schema()
|
1136
|
+
>>>
|
1137
|
+
>>> # Add your data
|
1138
|
+
>>> schema_dataset = schema_dataset.add_item({
|
1139
|
+
... "document": "Your document text",
|
1140
|
+
... "domain": "Computer Science",
|
1141
|
+
... "icl_document": "Example document"
|
1142
|
+
... })
|
1143
|
+
>>>
|
1144
|
+
>>> # Or validate your existing dataset schema
|
1145
|
+
>>> my_dataset = Dataset.from_dict(my_data)
|
1146
|
+
>>> if my_dataset.features == schema_dataset.features:
|
1147
|
+
... print("Schema matches!")
|
1148
|
+
"""
|
1149
|
+
|
1150
|
+
requirements = self.get_dataset_requirements()
|
1151
|
+
|
1152
|
+
if requirements is None:
|
1153
|
+
# Return empty dataset with no schema requirements
|
1154
|
+
return Dataset.from_dict({})
|
1155
|
+
|
1156
|
+
# Build schema features
|
1157
|
+
schema_features = {}
|
1158
|
+
|
1159
|
+
# Process required columns
|
1160
|
+
for col_name in requirements.required_columns:
|
1161
|
+
col_type = requirements.column_types.get(col_name, "string")
|
1162
|
+
schema_features[col_name] = self._map_column_type_to_feature(col_type)
|
1163
|
+
|
1164
|
+
# Process optional columns
|
1165
|
+
for col_name in requirements.optional_columns:
|
1166
|
+
col_type = requirements.column_types.get(col_name, "string")
|
1167
|
+
schema_features[col_name] = self._map_column_type_to_feature(col_type)
|
1168
|
+
|
1169
|
+
# Create empty dataset with the correct features
|
1170
|
+
features = datasets.Features(schema_features)
|
1171
|
+
empty_data = {col_name: [] for col_name in schema_features.keys()}
|
1172
|
+
|
1173
|
+
return Dataset.from_dict(empty_data, features=features)
|
1174
|
+
|
1175
|
+
def _map_column_type_to_feature(self, col_type: str):
|
1176
|
+
"""Map column type string to HuggingFace feature type."""
|
1177
|
+
# Map common type names to HuggingFace types
|
1178
|
+
if col_type in ["str", "string", "text"]:
|
1179
|
+
return datasets.Value("string")
|
1180
|
+
elif col_type in ["int", "integer"]:
|
1181
|
+
return datasets.Value("int64")
|
1182
|
+
elif col_type in ["float", "number"]:
|
1183
|
+
return datasets.Value("float64")
|
1184
|
+
elif col_type in ["bool", "boolean"]:
|
1185
|
+
return datasets.Value("bool")
|
1186
|
+
else:
|
1187
|
+
# Default to string for unknown types
|
1188
|
+
return datasets.Value("string")
|
1189
|
+
|
1190
|
+
def print_info(self) -> None:
|
1191
|
+
"""
|
1192
|
+
Print an interactive summary of the Flow in the console.
|
1193
|
+
|
1194
|
+
The summary contains:
|
1195
|
+
1. Flow metadata (name, version, author, description)
|
1196
|
+
2. Defined runtime parameters with type hints and defaults
|
1197
|
+
3. A table of all blocks with their input and output columns
|
1198
|
+
|
1199
|
+
Notes
|
1200
|
+
-----
|
1201
|
+
Uses the `rich` library for colourised output; install with
|
1202
|
+
`pip install rich` if not already present.
|
1203
|
+
|
1204
|
+
Returns
|
1205
|
+
-------
|
1206
|
+
None
|
1207
|
+
"""
|
1208
|
+
|
1209
|
+
console = Console()
|
1210
|
+
|
1211
|
+
# Create main tree structure
|
1212
|
+
flow_tree = Tree(
|
1213
|
+
f"[bold bright_blue]{self.metadata.name}[/bold bright_blue] Flow"
|
1214
|
+
)
|
1215
|
+
|
1216
|
+
# Metadata section
|
1217
|
+
metadata_branch = flow_tree.add(
|
1218
|
+
"[bold bright_green]Metadata[/bold bright_green]"
|
1219
|
+
)
|
1220
|
+
metadata_branch.add(
|
1221
|
+
f"Version: [bright_cyan]{self.metadata.version}[/bright_cyan]"
|
1222
|
+
)
|
1223
|
+
metadata_branch.add(
|
1224
|
+
f"Author: [bright_cyan]{self.metadata.author}[/bright_cyan]"
|
1225
|
+
)
|
1226
|
+
if self.metadata.description:
|
1227
|
+
metadata_branch.add(
|
1228
|
+
f"Description: [white]{self.metadata.description}[/white]"
|
1229
|
+
)
|
1230
|
+
|
1231
|
+
# Parameters section
|
1232
|
+
if self.parameters:
|
1233
|
+
params_branch = flow_tree.add(
|
1234
|
+
"[bold bright_yellow]Parameters[/bold bright_yellow]"
|
1235
|
+
)
|
1236
|
+
for name, param in self.parameters.items():
|
1237
|
+
param_info = f"[bright_cyan]{name}[/bright_cyan]: [white]{param.type_hint}[/white]"
|
1238
|
+
if param.default is not None:
|
1239
|
+
param_info += f" = [bright_white]{param.default}[/bright_white]"
|
1240
|
+
params_branch.add(param_info)
|
1241
|
+
|
1242
|
+
# Blocks overview
|
1243
|
+
flow_tree.add(
|
1244
|
+
f"[bold bright_magenta]Blocks[/bold bright_magenta] ({len(self.blocks)} total)"
|
1245
|
+
)
|
1246
|
+
|
1247
|
+
# Create blocks table
|
1248
|
+
blocks_table = Table(show_header=True, header_style="bold bright_white")
|
1249
|
+
blocks_table.add_column("Block Name", style="bright_cyan")
|
1250
|
+
blocks_table.add_column("Type", style="bright_green")
|
1251
|
+
blocks_table.add_column("Input Cols", style="bright_yellow")
|
1252
|
+
blocks_table.add_column("Output Cols", style="bright_red")
|
1253
|
+
|
1254
|
+
for block in self.blocks:
|
1255
|
+
input_cols = getattr(block, "input_cols", None)
|
1256
|
+
output_cols = getattr(block, "output_cols", None)
|
1257
|
+
|
1258
|
+
blocks_table.add_row(
|
1259
|
+
block.block_name,
|
1260
|
+
block.__class__.__name__,
|
1261
|
+
str(input_cols) if input_cols else "[bright_black]None[/bright_black]",
|
1262
|
+
str(output_cols)
|
1263
|
+
if output_cols
|
1264
|
+
else "[bright_black]None[/bright_black]",
|
1265
|
+
)
|
1266
|
+
|
1267
|
+
# Print everything
|
1268
|
+
console.print()
|
1269
|
+
console.print(
|
1270
|
+
Panel(
|
1271
|
+
flow_tree,
|
1272
|
+
title="[bold bright_white]Flow Information[/bold bright_white]",
|
1273
|
+
border_style="bright_blue",
|
1274
|
+
)
|
1275
|
+
)
|
1276
|
+
console.print()
|
1277
|
+
console.print(
|
1278
|
+
Panel(
|
1279
|
+
blocks_table,
|
1280
|
+
title="[bold bright_white]Block Details[/bold bright_white]",
|
1281
|
+
border_style="bright_magenta",
|
1282
|
+
)
|
1283
|
+
)
|
1284
|
+
console.print()
|
1285
|
+
|
933
1286
|
def to_yaml(self, output_path: str) -> None:
|
934
1287
|
"""Save flow configuration to YAML file.
|
935
1288
|
|
@@ -952,10 +1305,7 @@ class Flow(BaseModel):
|
|
952
1305
|
name: param.model_dump() for name, param in self.parameters.items()
|
953
1306
|
}
|
954
1307
|
|
955
|
-
|
956
|
-
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
957
|
-
|
958
|
-
logger.info(f"Flow configuration saved to: {output_path}")
|
1308
|
+
save_flow_yaml(output_path, config)
|
959
1309
|
|
960
1310
|
def __len__(self) -> int:
|
961
1311
|
"""Number of blocks in the flow."""
|