sdg-hub 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +2 -2
- sdg_hub/core/blocks/__init__.py +0 -22
- sdg_hub/core/blocks/transform/rename_columns.py +19 -0
- sdg_hub/core/flow/base.py +8 -80
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +5 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +5 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +5 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +6 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +5 -1
- {sdg_hub-0.4.2.dist-info → sdg_hub-0.5.0.dist-info}/METADATA +2 -2
- {sdg_hub-0.4.2.dist-info → sdg_hub-0.5.0.dist-info}/RECORD +14 -25
- sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -29
- sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -93
- sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -88
- sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -103
- sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -94
- sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -479
- sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -88
- sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -58
- sdg_hub/core/blocks/deprecated_blocks/selector.py +0 -97
- sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -88
- sdg_hub/core/flow/migration.py +0 -198
- {sdg_hub-0.4.2.dist-info → sdg_hub-0.5.0.dist-info}/WHEEL +0 -0
- {sdg_hub-0.4.2.dist-info → sdg_hub-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.4.2.dist-info → sdg_hub-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,88 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""Deprecated RenameColumns for backwards compatibility.
|
3
|
-
|
4
|
-
This module provides a deprecated wrapper around RenameColumnsBlock
|
5
|
-
to maintain backwards compatibility with existing code and configurations.
|
6
|
-
"""
|
7
|
-
|
8
|
-
# Standard
|
9
|
-
from typing import Any
|
10
|
-
import warnings
|
11
|
-
|
12
|
-
# Third Party
|
13
|
-
from datasets import Dataset
|
14
|
-
|
15
|
-
# Local
|
16
|
-
from ...utils.logger_config import setup_logger
|
17
|
-
from ..base import BaseBlock
|
18
|
-
from ..registry import BlockRegistry
|
19
|
-
from ..transform import RenameColumnsBlock
|
20
|
-
|
21
|
-
logger = setup_logger(__name__)
|
22
|
-
|
23
|
-
|
24
|
-
@BlockRegistry.register(
|
25
|
-
"RenameColumns",
|
26
|
-
"deprecated",
|
27
|
-
"DEPRECATED: Use RenameColumnsBlock instead. Renames columns in a dataset according to a mapping dictionary",
|
28
|
-
)
|
29
|
-
class RenameColumns(BaseBlock):
|
30
|
-
"""DEPRECATED: Block for renaming columns in a dataset.
|
31
|
-
|
32
|
-
This block is deprecated and maintained only for backwards compatibility.
|
33
|
-
Please use RenameColumnsBlock instead.
|
34
|
-
|
35
|
-
This block renames columns in a dataset according to a mapping dictionary,
|
36
|
-
where keys are existing column names and values are new column names.
|
37
|
-
"""
|
38
|
-
|
39
|
-
def __init__(
|
40
|
-
self,
|
41
|
-
block_name: str,
|
42
|
-
columns_map: dict[str, str],
|
43
|
-
) -> None:
|
44
|
-
"""Initialize the deprecated RenameColumns.
|
45
|
-
|
46
|
-
Parameters
|
47
|
-
----------
|
48
|
-
block_name : str
|
49
|
-
Name of the block.
|
50
|
-
columns_map : Dict[str, str]
|
51
|
-
Dictionary mapping existing column names to new column names.
|
52
|
-
Keys are existing column names, values are new column names.
|
53
|
-
"""
|
54
|
-
# Issue deprecation warning
|
55
|
-
warnings.warn(
|
56
|
-
"RenameColumns is deprecated and will be removed in a future version. "
|
57
|
-
"Please use RenameColumnsBlock instead.",
|
58
|
-
DeprecationWarning,
|
59
|
-
stacklevel=2,
|
60
|
-
)
|
61
|
-
|
62
|
-
# Map old signature to new signature
|
63
|
-
super().__init__(
|
64
|
-
block_name=block_name,
|
65
|
-
input_cols=columns_map,
|
66
|
-
output_cols=[],
|
67
|
-
)
|
68
|
-
|
69
|
-
# Create the new block instance with mapped parameters
|
70
|
-
self._new_block = RenameColumnsBlock(
|
71
|
-
block_name=block_name,
|
72
|
-
input_cols=columns_map,
|
73
|
-
)
|
74
|
-
|
75
|
-
def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
|
76
|
-
"""Generate dataset with renamed columns using the new RenameColumnsBlock.
|
77
|
-
|
78
|
-
Parameters
|
79
|
-
----------
|
80
|
-
samples : Dataset
|
81
|
-
The input dataset to rename columns in.
|
82
|
-
|
83
|
-
Returns
|
84
|
-
-------
|
85
|
-
Dataset
|
86
|
-
The dataset with renamed columns.
|
87
|
-
"""
|
88
|
-
return self._new_block.generate(samples, **kwargs)
|
@@ -1,58 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""DEPRECATED: SamplePopulatorBlock for backward compatibility.
|
3
|
-
|
4
|
-
This module provides a deprecated stub for SamplePopulatorBlock.
|
5
|
-
This block is deprecated and will be replaced with a router block.
|
6
|
-
"""
|
7
|
-
|
8
|
-
# Standard
|
9
|
-
from typing import Any
|
10
|
-
import warnings
|
11
|
-
|
12
|
-
# Third Party
|
13
|
-
from datasets import Dataset
|
14
|
-
|
15
|
-
# Local
|
16
|
-
from ...utils.logger_config import setup_logger
|
17
|
-
from ..base import BaseBlock
|
18
|
-
from ..registry import BlockRegistry
|
19
|
-
|
20
|
-
logger = setup_logger(__name__)
|
21
|
-
|
22
|
-
|
23
|
-
@BlockRegistry.register(
|
24
|
-
"SamplePopulatorBlock",
|
25
|
-
"deprecated",
|
26
|
-
"DEPRECATED: Use a router block instead. Populates dataset with data from configuration files",
|
27
|
-
)
|
28
|
-
class SamplePopulatorBlock(BaseBlock):
|
29
|
-
"""DEPRECATED: Block for populating dataset with data from configuration files.
|
30
|
-
|
31
|
-
.. deprecated::
|
32
|
-
This block is deprecated and will be replaced with a router block.
|
33
|
-
"""
|
34
|
-
|
35
|
-
def __init__(
|
36
|
-
self,
|
37
|
-
block_name: str,
|
38
|
-
config_paths: list[str],
|
39
|
-
column_name: str,
|
40
|
-
post_fix: str = "",
|
41
|
-
**batch_kwargs: dict[str, Any],
|
42
|
-
) -> None:
|
43
|
-
warnings.warn(
|
44
|
-
"SamplePopulatorBlock is deprecated and will be replaced with a router block.",
|
45
|
-
DeprecationWarning,
|
46
|
-
stacklevel=2,
|
47
|
-
)
|
48
|
-
|
49
|
-
# Initialize with dummy values for BaseBlock validation
|
50
|
-
super().__init__(
|
51
|
-
block_name=block_name, input_cols=[column_name], output_cols=[column_name]
|
52
|
-
)
|
53
|
-
|
54
|
-
def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
|
55
|
-
"""Generate method - raises error as block is deprecated."""
|
56
|
-
raise NotImplementedError(
|
57
|
-
"SamplePopulatorBlock is deprecated and will be replaced with a router block."
|
58
|
-
)
|
@@ -1,97 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""DEPRECATED: SelectorBlock for backward compatibility.
|
3
|
-
|
4
|
-
This module provides a deprecated wrapper for the old SelectorBlock interface.
|
5
|
-
Use transform.IndexBasedMapperBlock instead.
|
6
|
-
"""
|
7
|
-
|
8
|
-
# Standard
|
9
|
-
from typing import Any
|
10
|
-
import warnings
|
11
|
-
|
12
|
-
# Third Party
|
13
|
-
from datasets import Dataset
|
14
|
-
|
15
|
-
# Local
|
16
|
-
from ...utils.logger_config import setup_logger
|
17
|
-
from ..base import BaseBlock
|
18
|
-
from ..registry import BlockRegistry
|
19
|
-
from ..transform.index_based_mapper import IndexBasedMapperBlock
|
20
|
-
|
21
|
-
logger = setup_logger(__name__)
|
22
|
-
|
23
|
-
|
24
|
-
@BlockRegistry.register(
|
25
|
-
"SelectorBlock",
|
26
|
-
"deprecated",
|
27
|
-
"DEPRECATED: Use IndexBasedMapperBlock instead. Selects and maps values from one column to another",
|
28
|
-
)
|
29
|
-
class SelectorBlock(BaseBlock):
|
30
|
-
"""DEPRECATED: Block for selecting and mapping values from one column to another.
|
31
|
-
|
32
|
-
.. deprecated::
|
33
|
-
Use `sdg_hub.blocks.transform.IndexBasedMapperBlock` instead.
|
34
|
-
This class will be removed in a future version.
|
35
|
-
|
36
|
-
This block uses a mapping dictionary to select values from one column and
|
37
|
-
store them in a new output column based on a choice column's value.
|
38
|
-
|
39
|
-
Parameters
|
40
|
-
----------
|
41
|
-
block_name : str
|
42
|
-
Name of the block.
|
43
|
-
choice_map : Dict[str, str]
|
44
|
-
Dictionary mapping choice values to column names.
|
45
|
-
choice_col : str
|
46
|
-
Name of the column containing choice values.
|
47
|
-
output_col : str
|
48
|
-
Name of the column to store selected values.
|
49
|
-
**batch_kwargs : Dict[str, Any]
|
50
|
-
Additional keyword arguments for batch processing.
|
51
|
-
"""
|
52
|
-
|
53
|
-
def __init__(
|
54
|
-
self,
|
55
|
-
block_name: str,
|
56
|
-
choice_map: dict[str, str],
|
57
|
-
choice_col: str,
|
58
|
-
output_col: str,
|
59
|
-
**batch_kwargs: dict[str, Any],
|
60
|
-
) -> None:
|
61
|
-
warnings.warn(
|
62
|
-
"SelectorBlock is deprecated. Use sdg_hub.blocks.transform.IndexBasedMapperBlock instead.",
|
63
|
-
DeprecationWarning,
|
64
|
-
stacklevel=2,
|
65
|
-
)
|
66
|
-
|
67
|
-
# Initialize with dummy values for BaseBlock validation
|
68
|
-
# We need all columns referenced in choice_map as input, plus the choice column
|
69
|
-
all_input_cols = list(choice_map.values()) + [choice_col]
|
70
|
-
|
71
|
-
super().__init__(
|
72
|
-
block_name=block_name, input_cols=all_input_cols, output_cols=[output_col]
|
73
|
-
)
|
74
|
-
|
75
|
-
# Create the new implementation
|
76
|
-
self._impl = IndexBasedMapperBlock(
|
77
|
-
block_name=block_name,
|
78
|
-
input_cols=all_input_cols,
|
79
|
-
output_cols=[output_col],
|
80
|
-
choice_map=choice_map,
|
81
|
-
choice_cols=[choice_col],
|
82
|
-
)
|
83
|
-
|
84
|
-
def generate(self, samples: Dataset, **kwargs) -> Dataset:
|
85
|
-
"""Generate a new dataset with selected values.
|
86
|
-
|
87
|
-
Parameters
|
88
|
-
----------
|
89
|
-
samples : Dataset
|
90
|
-
Input dataset to process.
|
91
|
-
|
92
|
-
Returns
|
93
|
-
-------
|
94
|
-
Dataset
|
95
|
-
Dataset with selected values stored in output column.
|
96
|
-
"""
|
97
|
-
return self._impl.generate(samples)
|
@@ -1,88 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""Deprecated SetToMajorityValue for backwards compatibility.
|
3
|
-
|
4
|
-
This module provides a deprecated wrapper around UniformColumnValueSetter
|
5
|
-
to maintain backwards compatibility with existing code and configurations.
|
6
|
-
"""
|
7
|
-
|
8
|
-
# Standard
|
9
|
-
from typing import Any
|
10
|
-
import warnings
|
11
|
-
|
12
|
-
# Third Party
|
13
|
-
from datasets import Dataset
|
14
|
-
|
15
|
-
# Local
|
16
|
-
from ...utils.logger_config import setup_logger
|
17
|
-
from ..base import BaseBlock
|
18
|
-
from ..registry import BlockRegistry
|
19
|
-
from ..transform import UniformColumnValueSetter
|
20
|
-
|
21
|
-
logger = setup_logger(__name__)
|
22
|
-
|
23
|
-
|
24
|
-
@BlockRegistry.register(
|
25
|
-
"SetToMajorityValue",
|
26
|
-
"deprecated",
|
27
|
-
"DEPRECATED: Use UniformColumnValueSetter with reduction_strategy='mode' instead. Sets all values in a column to the most frequent value",
|
28
|
-
)
|
29
|
-
class SetToMajorityValue(BaseBlock):
|
30
|
-
"""DEPRECATED: Block for setting all values in a column to the most frequent value.
|
31
|
-
|
32
|
-
This block is deprecated and maintained only for backwards compatibility.
|
33
|
-
Please use UniformColumnValueSetter with reduction_strategy='mode' instead.
|
34
|
-
|
35
|
-
This block finds the most common value (mode) in a specified column and
|
36
|
-
replaces all values in that column with this majority value.
|
37
|
-
"""
|
38
|
-
|
39
|
-
def __init__(
|
40
|
-
self,
|
41
|
-
block_name: str,
|
42
|
-
col_name: str,
|
43
|
-
) -> None:
|
44
|
-
"""Initialize the deprecated SetToMajorityValue.
|
45
|
-
|
46
|
-
Parameters
|
47
|
-
----------
|
48
|
-
block_name : str
|
49
|
-
Name of the block.
|
50
|
-
col_name : str
|
51
|
-
Name of the column to set to majority value.
|
52
|
-
"""
|
53
|
-
# Issue deprecation warning
|
54
|
-
warnings.warn(
|
55
|
-
"SetToMajorityValue is deprecated and will be removed in a future version. "
|
56
|
-
"Please use UniformColumnValueSetter with reduction_strategy='mode' instead.",
|
57
|
-
DeprecationWarning,
|
58
|
-
stacklevel=2,
|
59
|
-
)
|
60
|
-
|
61
|
-
# Map old signature to new signature
|
62
|
-
super().__init__(
|
63
|
-
block_name=block_name,
|
64
|
-
input_cols=[col_name],
|
65
|
-
output_cols=[],
|
66
|
-
)
|
67
|
-
|
68
|
-
# Create the new block instance with mapped parameters
|
69
|
-
self._new_block = UniformColumnValueSetter(
|
70
|
-
block_name=block_name,
|
71
|
-
input_cols=[col_name],
|
72
|
-
reduction_strategy="mode",
|
73
|
-
)
|
74
|
-
|
75
|
-
def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
|
76
|
-
"""Generate dataset with column set to majority value using UniformColumnValueSetter.
|
77
|
-
|
78
|
-
Parameters
|
79
|
-
----------
|
80
|
-
samples : Dataset
|
81
|
-
The input dataset to process.
|
82
|
-
|
83
|
-
Returns
|
84
|
-
-------
|
85
|
-
Dataset
|
86
|
-
The dataset with specified column set to its majority value.
|
87
|
-
"""
|
88
|
-
return self._new_block.generate(samples, **kwargs)
|
sdg_hub/core/flow/migration.py
DELETED
@@ -1,198 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""Migration utilities for backward compatibility with old flow formats."""
|
3
|
-
|
4
|
-
# Standard
|
5
|
-
from pathlib import Path
|
6
|
-
from typing import Any, Union
|
7
|
-
|
8
|
-
# Local
|
9
|
-
from ..utils.logger_config import setup_logger
|
10
|
-
|
11
|
-
logger = setup_logger(__name__)
|
12
|
-
|
13
|
-
|
14
|
-
class FlowMigration:
|
15
|
-
"""Utility class for migrating old flow formats to new format."""
|
16
|
-
|
17
|
-
@staticmethod
|
18
|
-
def is_old_format(flow_config: Union[list[dict[str, Any]], dict[str, Any]]) -> bool:
|
19
|
-
"""Detect if a flow configuration is in the old format.
|
20
|
-
|
21
|
-
Parameters
|
22
|
-
----------
|
23
|
-
flow_config : Union[List[Dict[str, Any]], Dict[str, Any]]
|
24
|
-
The loaded YAML configuration.
|
25
|
-
|
26
|
-
Returns
|
27
|
-
-------
|
28
|
-
bool
|
29
|
-
True if the configuration is in old format, False otherwise.
|
30
|
-
"""
|
31
|
-
# Old format: Direct array of blocks
|
32
|
-
# New format: Dictionary with 'metadata' and 'blocks' keys
|
33
|
-
if isinstance(flow_config, list):
|
34
|
-
return True
|
35
|
-
|
36
|
-
if isinstance(flow_config, dict):
|
37
|
-
# Check if it has the new format structure
|
38
|
-
has_metadata = "metadata" in flow_config
|
39
|
-
has_blocks = "blocks" in flow_config
|
40
|
-
|
41
|
-
# If it has both metadata and blocks, it's new format
|
42
|
-
if has_metadata and has_blocks:
|
43
|
-
return False
|
44
|
-
|
45
|
-
# If it doesn't have the expected new format structure but is a dict,
|
46
|
-
# check if it looks like old format (all keys are block configs)
|
47
|
-
if not has_metadata and not has_blocks:
|
48
|
-
# Check first few items to see if they look like old block configs
|
49
|
-
for value in flow_config.values():
|
50
|
-
if isinstance(value, dict) and "block_type" in value:
|
51
|
-
return True
|
52
|
-
# If it's a dict but doesn't look like blocks, assume new format
|
53
|
-
return False
|
54
|
-
|
55
|
-
# If we can't determine, assume new format
|
56
|
-
return False
|
57
|
-
|
58
|
-
@staticmethod
|
59
|
-
def migrate_to_new_format(
|
60
|
-
flow_config: list[dict[str, Any]], yaml_path: str
|
61
|
-
) -> tuple[dict[str, Any], dict[str, dict[str, Any]]]:
|
62
|
-
"""Migrate old format flow configuration to new format.
|
63
|
-
|
64
|
-
Parameters
|
65
|
-
----------
|
66
|
-
flow_config : List[Dict[str, Any]]
|
67
|
-
Old format flow configuration (array of blocks).
|
68
|
-
yaml_path : str
|
69
|
-
Path to the original YAML file for generating metadata.
|
70
|
-
|
71
|
-
Returns
|
72
|
-
-------
|
73
|
-
tuple[Dict[str, Any], Dict[str, Dict[str, Any]]]
|
74
|
-
Tuple of (new format flow configuration, extracted runtime_params).
|
75
|
-
"""
|
76
|
-
logger.info(f"Migrating old flow format from: {yaml_path}")
|
77
|
-
|
78
|
-
# Generate default metadata
|
79
|
-
flow_name = Path(yaml_path).stem
|
80
|
-
metadata = FlowMigration._generate_default_metadata(flow_name)
|
81
|
-
|
82
|
-
# Process blocks and extract runtime parameters
|
83
|
-
migrated_blocks = []
|
84
|
-
runtime_params = {}
|
85
|
-
|
86
|
-
for i, block_config in enumerate(flow_config):
|
87
|
-
try:
|
88
|
-
migrated_block, block_runtime_params = (
|
89
|
-
FlowMigration._migrate_block_config(block_config)
|
90
|
-
)
|
91
|
-
migrated_blocks.append(migrated_block)
|
92
|
-
|
93
|
-
# Add block's runtime params if any
|
94
|
-
if block_runtime_params:
|
95
|
-
block_name = migrated_block.get("block_config", {}).get(
|
96
|
-
"block_name"
|
97
|
-
)
|
98
|
-
if block_name:
|
99
|
-
runtime_params[block_name] = block_runtime_params
|
100
|
-
|
101
|
-
except Exception as exc:
|
102
|
-
logger.warning(f"Failed to migrate block at index {i}: {exc}")
|
103
|
-
# Keep original block config as fallback
|
104
|
-
migrated_blocks.append(block_config)
|
105
|
-
|
106
|
-
# Create new format structure
|
107
|
-
new_config = {"metadata": metadata, "blocks": migrated_blocks}
|
108
|
-
|
109
|
-
logger.info(f"Successfully migrated flow with {len(migrated_blocks)} blocks")
|
110
|
-
logger.info(f"Extracted runtime_params for {len(runtime_params)} blocks")
|
111
|
-
|
112
|
-
return new_config, runtime_params
|
113
|
-
|
114
|
-
@staticmethod
|
115
|
-
def _generate_default_metadata(flow_name: str) -> dict[str, Any]:
|
116
|
-
"""Generate default metadata for migrated flows."""
|
117
|
-
# Import here to avoid circular import
|
118
|
-
from ..utils.flow_identifier import get_flow_identifier
|
119
|
-
|
120
|
-
metadata = {
|
121
|
-
"name": flow_name,
|
122
|
-
"description": f"Migrated flow: {flow_name}",
|
123
|
-
"version": "1.0.0",
|
124
|
-
"author": "SDG_Hub",
|
125
|
-
"tags": ["migrated"],
|
126
|
-
"recommended_models": {
|
127
|
-
"default": "meta-llama/Llama-3.3-70B-Instruct",
|
128
|
-
"compatible": [],
|
129
|
-
"experimental": [],
|
130
|
-
},
|
131
|
-
}
|
132
|
-
|
133
|
-
# Generate id for migrated flows
|
134
|
-
flow_id = get_flow_identifier(flow_name)
|
135
|
-
if flow_id:
|
136
|
-
metadata["id"] = flow_id
|
137
|
-
logger.debug(f"Generated id for migrated flow: {flow_id}")
|
138
|
-
|
139
|
-
return metadata
|
140
|
-
|
141
|
-
@staticmethod
|
142
|
-
def _migrate_block_config(
|
143
|
-
block_config: dict[str, Any],
|
144
|
-
) -> tuple[dict[str, Any], dict[str, Any]]:
|
145
|
-
"""Migrate individual block configuration from old to new format.
|
146
|
-
|
147
|
-
Parameters
|
148
|
-
----------
|
149
|
-
block_config : Dict[str, Any]
|
150
|
-
Old format block configuration.
|
151
|
-
|
152
|
-
Returns
|
153
|
-
-------
|
154
|
-
tuple[Dict[str, Any], Dict[str, Any]]
|
155
|
-
Tuple of (migrated block configuration, extracted runtime_params).
|
156
|
-
"""
|
157
|
-
if not isinstance(block_config, dict):
|
158
|
-
return block_config, {}
|
159
|
-
|
160
|
-
# Start with the original config
|
161
|
-
migrated_config = block_config.copy()
|
162
|
-
runtime_params = {}
|
163
|
-
|
164
|
-
# Extract gen_kwargs as runtime_params
|
165
|
-
if "gen_kwargs" in migrated_config:
|
166
|
-
runtime_params = migrated_config.pop("gen_kwargs")
|
167
|
-
logger.debug(f"Extracted gen_kwargs as runtime_params: {runtime_params}")
|
168
|
-
|
169
|
-
# Remove unsupported fields
|
170
|
-
for unsupported_field in ["drop_columns", "drop_duplicates", "batch_kwargs"]:
|
171
|
-
if unsupported_field in migrated_config:
|
172
|
-
migrated_config.pop(unsupported_field)
|
173
|
-
logger.debug(
|
174
|
-
f"Ignoring {unsupported_field} as it's not supported in new flow format"
|
175
|
-
)
|
176
|
-
|
177
|
-
# Handle parser_kwargs for LLMBlock (keep in block_config)
|
178
|
-
if migrated_config.get("block_type") == "LLMBlock":
|
179
|
-
block_config_section = migrated_config.get("block_config", {})
|
180
|
-
if "parser_kwargs" in block_config_section:
|
181
|
-
parser_kwargs = block_config_section["parser_kwargs"]
|
182
|
-
logger.debug(f"Preserving parser_kwargs for LLMBlock: {parser_kwargs}")
|
183
|
-
|
184
|
-
# Handle operator string conversion for FilterByValueBlock
|
185
|
-
if migrated_config.get("block_type") == "FilterByValueBlock":
|
186
|
-
block_config_section = migrated_config.get("block_config", {})
|
187
|
-
if "operation" in block_config_section:
|
188
|
-
operation = block_config_section["operation"]
|
189
|
-
if isinstance(operation, str) and operation.startswith("operator."):
|
190
|
-
# Convert "operator.eq" to "eq"
|
191
|
-
block_config_section["operation"] = operation.replace(
|
192
|
-
"operator.", ""
|
193
|
-
)
|
194
|
-
logger.debug(
|
195
|
-
f"Converted operation from {operation} to {block_config_section['operation']}"
|
196
|
-
)
|
197
|
-
|
198
|
-
return migrated_config, runtime_params
|
File without changes
|
File without changes
|
File without changes
|