sdg-hub 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. sdg_hub/__init__.py +28 -1
  2. sdg_hub/_version.py +2 -2
  3. sdg_hub/core/__init__.py +22 -0
  4. sdg_hub/core/blocks/__init__.py +58 -0
  5. sdg_hub/core/blocks/base.py +313 -0
  6. sdg_hub/core/blocks/deprecated_blocks/__init__.py +29 -0
  7. sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +93 -0
  8. sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +88 -0
  9. sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +103 -0
  10. sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +94 -0
  11. sdg_hub/core/blocks/deprecated_blocks/llmblock.py +479 -0
  12. sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +88 -0
  13. sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +58 -0
  14. sdg_hub/core/blocks/deprecated_blocks/selector.py +97 -0
  15. sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +88 -0
  16. sdg_hub/core/blocks/evaluation/__init__.py +9 -0
  17. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +564 -0
  18. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +564 -0
  19. sdg_hub/core/blocks/evaluation/verify_question_block.py +564 -0
  20. sdg_hub/core/blocks/filtering/__init__.py +12 -0
  21. sdg_hub/core/blocks/filtering/column_value_filter.py +188 -0
  22. sdg_hub/core/blocks/llm/__init__.py +25 -0
  23. sdg_hub/core/blocks/llm/client_manager.py +398 -0
  24. sdg_hub/core/blocks/llm/config.py +336 -0
  25. sdg_hub/core/blocks/llm/error_handler.py +368 -0
  26. sdg_hub/core/blocks/llm/llm_chat_block.py +542 -0
  27. sdg_hub/core/blocks/llm/prompt_builder_block.py +368 -0
  28. sdg_hub/core/blocks/llm/text_parser_block.py +310 -0
  29. sdg_hub/core/blocks/registry.py +331 -0
  30. sdg_hub/core/blocks/transform/__init__.py +23 -0
  31. sdg_hub/core/blocks/transform/duplicate_columns.py +88 -0
  32. sdg_hub/core/blocks/transform/index_based_mapper.py +225 -0
  33. sdg_hub/core/blocks/transform/melt_columns.py +126 -0
  34. sdg_hub/core/blocks/transform/rename_columns.py +69 -0
  35. sdg_hub/core/blocks/transform/text_concat.py +102 -0
  36. sdg_hub/core/blocks/transform/uniform_col_val_setter.py +101 -0
  37. sdg_hub/core/flow/__init__.py +20 -0
  38. sdg_hub/core/flow/base.py +980 -0
  39. sdg_hub/core/flow/metadata.py +344 -0
  40. sdg_hub/core/flow/migration.py +187 -0
  41. sdg_hub/core/flow/registry.py +330 -0
  42. sdg_hub/core/flow/validation.py +265 -0
  43. sdg_hub/{utils → core/utils}/__init__.py +6 -4
  44. sdg_hub/{utils → core/utils}/datautils.py +1 -3
  45. sdg_hub/core/utils/error_handling.py +208 -0
  46. sdg_hub/{utils → core/utils}/path_resolution.py +2 -2
  47. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +40 -0
  48. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +13 -0
  49. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +64 -0
  50. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +29 -0
  51. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +81 -0
  52. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +13 -0
  53. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +191 -0
  54. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +54 -0
  55. sdg_hub-0.2.0.dist-info/METADATA +218 -0
  56. sdg_hub-0.2.0.dist-info/RECORD +63 -0
  57. sdg_hub/blocks/__init__.py +0 -42
  58. sdg_hub/blocks/block.py +0 -96
  59. sdg_hub/blocks/llmblock.py +0 -375
  60. sdg_hub/blocks/openaichatblock.py +0 -556
  61. sdg_hub/blocks/utilblocks.py +0 -597
  62. sdg_hub/checkpointer.py +0 -139
  63. sdg_hub/configs/annotations/cot_reflection.yaml +0 -34
  64. sdg_hub/configs/annotations/detailed_annotations.yaml +0 -28
  65. sdg_hub/configs/annotations/detailed_description.yaml +0 -10
  66. sdg_hub/configs/annotations/detailed_description_icl.yaml +0 -32
  67. sdg_hub/configs/annotations/simple_annotations.yaml +0 -9
  68. sdg_hub/configs/knowledge/__init__.py +0 -0
  69. sdg_hub/configs/knowledge/atomic_facts.yaml +0 -46
  70. sdg_hub/configs/knowledge/auxilary_instructions.yaml +0 -35
  71. sdg_hub/configs/knowledge/detailed_summary.yaml +0 -18
  72. sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +0 -68
  73. sdg_hub/configs/knowledge/evaluate_question.yaml +0 -38
  74. sdg_hub/configs/knowledge/evaluate_relevancy.yaml +0 -84
  75. sdg_hub/configs/knowledge/extractive_summary.yaml +0 -18
  76. sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +0 -39
  77. sdg_hub/configs/knowledge/generate_questions.yaml +0 -82
  78. sdg_hub/configs/knowledge/generate_questions_responses.yaml +0 -56
  79. sdg_hub/configs/knowledge/generate_responses.yaml +0 -86
  80. sdg_hub/configs/knowledge/mcq_generation.yaml +0 -83
  81. sdg_hub/configs/knowledge/router.yaml +0 -12
  82. sdg_hub/configs/knowledge/simple_generate_qa.yaml +0 -34
  83. sdg_hub/configs/reasoning/__init__.py +0 -0
  84. sdg_hub/configs/reasoning/dynamic_cot.yaml +0 -40
  85. sdg_hub/configs/skills/__init__.py +0 -0
  86. sdg_hub/configs/skills/analyzer.yaml +0 -48
  87. sdg_hub/configs/skills/annotation.yaml +0 -36
  88. sdg_hub/configs/skills/contexts.yaml +0 -28
  89. sdg_hub/configs/skills/critic.yaml +0 -60
  90. sdg_hub/configs/skills/evaluate_freeform_pair.yaml +0 -111
  91. sdg_hub/configs/skills/evaluate_freeform_questions.yaml +0 -78
  92. sdg_hub/configs/skills/evaluate_grounded_pair.yaml +0 -119
  93. sdg_hub/configs/skills/evaluate_grounded_questions.yaml +0 -51
  94. sdg_hub/configs/skills/freeform_questions.yaml +0 -34
  95. sdg_hub/configs/skills/freeform_responses.yaml +0 -39
  96. sdg_hub/configs/skills/grounded_questions.yaml +0 -38
  97. sdg_hub/configs/skills/grounded_responses.yaml +0 -59
  98. sdg_hub/configs/skills/icl_examples/STEM.yaml +0 -56
  99. sdg_hub/configs/skills/icl_examples/__init__.py +0 -0
  100. sdg_hub/configs/skills/icl_examples/coding.yaml +0 -97
  101. sdg_hub/configs/skills/icl_examples/extraction.yaml +0 -36
  102. sdg_hub/configs/skills/icl_examples/humanities.yaml +0 -71
  103. sdg_hub/configs/skills/icl_examples/math.yaml +0 -85
  104. sdg_hub/configs/skills/icl_examples/reasoning.yaml +0 -30
  105. sdg_hub/configs/skills/icl_examples/roleplay.yaml +0 -45
  106. sdg_hub/configs/skills/icl_examples/writing.yaml +0 -80
  107. sdg_hub/configs/skills/judge.yaml +0 -53
  108. sdg_hub/configs/skills/planner.yaml +0 -67
  109. sdg_hub/configs/skills/respond.yaml +0 -8
  110. sdg_hub/configs/skills/revised_responder.yaml +0 -78
  111. sdg_hub/configs/skills/router.yaml +0 -59
  112. sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +0 -27
  113. sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +0 -31
  114. sdg_hub/flow.py +0 -477
  115. sdg_hub/flow_runner.py +0 -450
  116. sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +0 -13
  117. sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +0 -12
  118. sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +0 -89
  119. sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +0 -136
  120. sdg_hub/flows/generation/skills/improve_responses.yaml +0 -103
  121. sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +0 -12
  122. sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +0 -12
  123. sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +0 -80
  124. sdg_hub/flows/generation/skills/synth_skills.yaml +0 -59
  125. sdg_hub/pipeline.py +0 -121
  126. sdg_hub/prompts.py +0 -80
  127. sdg_hub/registry.py +0 -122
  128. sdg_hub/sdg.py +0 -206
  129. sdg_hub/utils/config_validation.py +0 -91
  130. sdg_hub/utils/error_handling.py +0 -94
  131. sdg_hub/utils/validation_result.py +0 -10
  132. sdg_hub-0.1.4.dist-info/METADATA +0 -190
  133. sdg_hub-0.1.4.dist-info/RECORD +0 -89
  134. sdg_hub/{logger_config.py → core/utils/logger_config.py} +1 -1
  135. /sdg_hub/{configs/__init__.py → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md} +0 -0
  136. /sdg_hub/{configs/annotations → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
  137. {sdg_hub-0.1.4.dist-info → sdg_hub-0.2.0.dist-info}/WHEEL +0 -0
  138. {sdg_hub-0.1.4.dist-info → sdg_hub-0.2.0.dist-info}/licenses/LICENSE +0 -0
  139. {sdg_hub-0.1.4.dist-info → sdg_hub-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,126 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Melt columns block for wide-to-long format transformation.
3
+
4
+ This module provides a block for transforming wide dataset format into long format
5
+ by melting specified columns into rows.
6
+ """
7
+
8
+ # Standard
9
+ from typing import Any
10
+
11
+ # Third Party
12
+ from datasets import Dataset
13
+ from pydantic import field_validator
14
+
15
+ # Local
16
+ from ...utils.error_handling import MissingColumnError
17
+ from ...utils.logger_config import setup_logger
18
+ from ..base import BaseBlock
19
+ from ..registry import BlockRegistry
20
+
21
+ logger = setup_logger(__name__)
22
+
23
+
24
+ @BlockRegistry.register(
25
+ "MeltColumnsBlock",
26
+ "transform",
27
+ "Transforms wide dataset format into long format by melting columns into rows",
28
+ )
29
+ class MeltColumnsBlock(BaseBlock):
30
+ """Block for flattening multiple columns into a long format.
31
+
32
+ This block transforms a wide dataset format into a long format by melting
33
+ specified columns into rows, creating new variable and value columns.
34
+
35
+ The input_cols should contain the columns to be melted (variable columns).
36
+ The output_cols must specify exactly two columns: [value_column, variable_column].
37
+ Any other columns in the dataset will be treated as ID columns and preserved.
38
+
39
+ Attributes
40
+ ----------
41
+ block_name : str
42
+ Name of the block.
43
+ input_cols : Union[str, List[str], Dict[str, Any], None]
44
+ Columns to be melted into rows (variable columns).
45
+ output_cols : Union[str, List[str], Dict[str, Any], None]
46
+ Output column specification. Must specify exactly two columns: [value_column, variable_column].
47
+ """
48
+
49
+ @field_validator("input_cols", mode="after")
50
+ @classmethod
51
+ def validate_input_cols(cls, v):
52
+ """Validate that input_cols is not empty."""
53
+ if not v:
54
+ raise ValueError("input_cols cannot be empty")
55
+ return v
56
+
57
+ @field_validator("output_cols", mode="after")
58
+ @classmethod
59
+ def validate_output_cols(cls, v):
60
+ """Validate that exactly two output columns are specified."""
61
+ if len(v) != 2:
62
+ raise ValueError(
63
+ f"MeltColumnsBlock expects exactly two output columns (value, variable), got {len(v)}: {v}"
64
+ )
65
+ return v
66
+
67
+ def model_post_init(self, __context: Any) -> None:
68
+ """Initialize derived attributes after Pydantic validation."""
69
+ super().model_post_init(__context) if hasattr(
70
+ super(), "model_post_init"
71
+ ) else None
72
+
73
+ # Derive value and variable column names from output_cols
74
+ self.value_name = self.output_cols[0] # First output column is value
75
+ self.var_name = self.output_cols[1] # Second output column is variable
76
+
77
+ # input_cols contains the columns to be melted (what was var_cols)
78
+ self.var_cols = (
79
+ self.input_cols if isinstance(self.input_cols, list) else [self.input_cols]
80
+ )
81
+
82
+ def _validate_custom(self, samples: Dataset) -> None:
83
+ """Validate that required columns exist in the dataset.
84
+
85
+ Parameters
86
+ ----------
87
+ samples : Dataset
88
+ Input dataset to validate.
89
+
90
+ Raises
91
+ ------
92
+ MissingColumnError
93
+ If required columns are missing from the dataset.
94
+ """
95
+ # Check that all var_cols exist in the dataset
96
+ missing_cols = list(set(self.var_cols) - set(samples.column_names))
97
+ if missing_cols:
98
+ raise MissingColumnError(
99
+ block_name=self.block_name,
100
+ missing_columns=missing_cols,
101
+ available_columns=samples.column_names,
102
+ )
103
+
104
+ def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
105
+ """Generate a flattened dataset in long format.
106
+
107
+ Parameters
108
+ ----------
109
+ samples : Dataset
110
+ Input dataset to flatten.
111
+
112
+ Returns
113
+ -------
114
+ Dataset
115
+ Flattened dataset in long format with new variable and value columns.
116
+ """
117
+ # Use the original simple logic - just adapted to use derived attributes
118
+ df = samples.to_pandas()
119
+ id_cols = [col for col in samples.column_names if col not in self.var_cols]
120
+ flatten_df = df.melt(
121
+ id_vars=id_cols,
122
+ value_vars=self.var_cols,
123
+ value_name=self.value_name,
124
+ var_name=self.var_name,
125
+ )
126
+ return Dataset.from_pandas(flatten_df)
@@ -0,0 +1,69 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Rename columns block for dataset column renaming operations.
3
+
4
+ This module provides a block for renaming columns in datasets according
5
+ to a mapping specification.
6
+ """
7
+
8
+ # Standard
9
+ from typing import Any
10
+
11
+ # Third Party
12
+ from datasets import Dataset
13
+ from pydantic import field_validator
14
+
15
+ # Local
16
+ from ...utils.logger_config import setup_logger
17
+ from ..base import BaseBlock
18
+ from ..registry import BlockRegistry
19
+
20
+ logger = setup_logger(__name__)
21
+
22
+
23
+ @BlockRegistry.register(
24
+ "RenameColumnsBlock",
25
+ "transform",
26
+ "Renames columns in a dataset according to a mapping specification",
27
+ )
28
+ class RenameColumnsBlock(BaseBlock):
29
+ """Block for renaming columns in a dataset.
30
+
31
+ This block renames columns in a dataset according to a mapping specification.
32
+ The mapping is provided through input_cols as a dictionary.
33
+
34
+ Attributes
35
+ ----------
36
+ block_name : str
37
+ Name of the block.
38
+ input_cols : Dict[str, str]
39
+ Dictionary mapping existing column names to new column names.
40
+ Keys are existing column names, values are new column names.
41
+ """
42
+
43
+ @field_validator("input_cols", mode="after")
44
+ @classmethod
45
+ def validate_input_cols(cls, v):
46
+ """Validate that input_cols is a non-empty dict."""
47
+ if not v:
48
+ raise ValueError("input_cols cannot be empty")
49
+ if not isinstance(v, dict):
50
+ raise ValueError(
51
+ "input_cols must be a dictionary mapping old column names to new column names"
52
+ )
53
+ return v
54
+
55
+ def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
56
+ """Generate a dataset with renamed columns.
57
+
58
+ Parameters
59
+ ----------
60
+ samples : Dataset
61
+ Input dataset to rename columns in.
62
+
63
+ Returns
64
+ -------
65
+ Dataset
66
+ Dataset with renamed columns.
67
+ """
68
+ # Rename columns using HuggingFace datasets method
69
+ return samples.rename_columns(self.input_cols)
@@ -0,0 +1,102 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Text concatenation block for dataset column combination operations.
3
+
4
+ This module provides a block for combining multiple columns into a single column
5
+ using a specified separator.
6
+ """
7
+
8
+ # Standard
9
+ from typing import Any
10
+
11
+ # Third Party
12
+ from datasets import Dataset
13
+ from pydantic import Field, field_validator
14
+
15
+ # Local
16
+ from ...utils.logger_config import setup_logger
17
+ from ..base import BaseBlock
18
+ from ..registry import BlockRegistry
19
+
20
+ logger = setup_logger(__name__)
21
+
22
+
23
+ @BlockRegistry.register(
24
+ "TextConcatBlock",
25
+ "transform",
26
+ "Combines multiple columns into a single column using a specified separator",
27
+ )
28
+ class TextConcatBlock(BaseBlock):
29
+ """Block for combining multiple columns into a single column.
30
+
31
+ This block concatenates values from multiple columns into a single output column,
32
+ using a specified separator between values.
33
+
34
+ Attributes
35
+ ----------
36
+ block_name : str
37
+ Name of the block.
38
+ input_cols : list[str]
39
+ List of column names to combine.
40
+ output_cols : list[str]
41
+ List containing the single output column name.
42
+ separator : str
43
+ String to use as separator between combined values.
44
+ """
45
+
46
+ separator: str = Field(
47
+ default="\n\n", description="Separator to use between combined values"
48
+ )
49
+
50
+ @field_validator("input_cols", mode="after")
51
+ @classmethod
52
+ def validate_input_cols(cls, v):
53
+ """Validate that input_cols is a non-empty list."""
54
+ if not v:
55
+ raise ValueError("input_cols cannot be empty")
56
+ if not isinstance(v, list):
57
+ raise ValueError("input_cols must be a list of column names")
58
+ return v
59
+
60
+ @field_validator("output_cols", mode="after")
61
+ @classmethod
62
+ def validate_output_cols(cls, v):
63
+ """Validate that exactly one output column is specified."""
64
+ if not v or len(v) != 1:
65
+ raise ValueError("TextConcatBlock requires exactly one output column")
66
+ return v
67
+
68
+ def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
69
+ """Generate a dataset with combined columns.
70
+
71
+ Parameters
72
+ ----------
73
+ samples : Dataset
74
+ Input dataset to process.
75
+
76
+ Returns
77
+ -------
78
+ Dataset
79
+ Dataset with combined values stored in output column.
80
+ """
81
+ if not self.output_cols:
82
+ raise ValueError("output_cols must be specified")
83
+
84
+ output_col = self.output_cols[0]
85
+
86
+ def _combine_columns(sample):
87
+ """Combine values from input columns."""
88
+ # Check that all input columns exist
89
+ for col in self.input_cols:
90
+ if col not in sample:
91
+ raise ValueError(f"Input column '{col}' not found in sample")
92
+
93
+ # Combine values using separator
94
+ combined_value = self.separator.join(
95
+ [str(sample[col]) for col in self.input_cols]
96
+ )
97
+ sample[output_col] = combined_value
98
+ return sample
99
+
100
+ # Apply the combination to all samples
101
+ result = samples.map(_combine_columns)
102
+ return result
@@ -0,0 +1,101 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Uniform column value setter block for replacing a column with a single statistic.
3
+
4
+ This block sets all values in a column to a single summary statistic:
5
+ mode, min, max, mean, or median.
6
+ """
7
+
8
+ # Standard
9
+ from typing import Any, Literal
10
+
11
+ # Third Party
12
+ from datasets import Dataset
13
+ from pydantic import field_validator
14
+ import numpy as np
15
+
16
+ # Local
17
+ from ...utils.logger_config import setup_logger
18
+ from ..base import BaseBlock
19
+ from ..registry import BlockRegistry
20
+
21
+ logger = setup_logger(__name__)
22
+
23
+
24
+ @BlockRegistry.register(
25
+ "UniformColumnValueSetter",
26
+ "transform",
27
+ "Replaces all values in a column with a single summary statistic (e.g., mode, mean, median)",
28
+ )
29
+ class UniformColumnValueSetter(BaseBlock):
30
+ """Block that replaces all values in a column with a single aggregate value.
31
+
32
+ Supported strategies include: mode, min, max, mean, median.
33
+
34
+ Attributes
35
+ ----------
36
+ block_name : str
37
+ Name of the block.
38
+ input_cols : Union[str, List[str]]
39
+ Must specify exactly one input column.
40
+ output_cols : Union[str, List[str]]
41
+ Output column list. Ignored — modifies in place.
42
+ reduction_strategy : Literal["mode", "min", "max", "mean", "median"]
43
+ Strategy used to compute the replacement value.
44
+ """
45
+
46
+ reduction_strategy: Literal["mode", "min", "max", "mean", "median"] = "mode"
47
+
48
+ @field_validator("input_cols", mode="after")
49
+ @classmethod
50
+ def validate_input_cols_single(cls, v):
51
+ if not v or len(v) != 1:
52
+ raise ValueError(
53
+ "UniformColumnValueSetter requires exactly one input column"
54
+ )
55
+ return v
56
+
57
+ def model_post_init(self, __context: Any) -> None:
58
+ if hasattr(super(), "model_post_init"):
59
+ super().model_post_init(__context)
60
+
61
+ if self.output_cols and len(self.output_cols) > 0:
62
+ logger.warning(
63
+ f"UniformColumnValueSetter modifies columns in-place. "
64
+ f"Specified output_cols {self.output_cols} will be ignored."
65
+ )
66
+ self.output_cols = []
67
+ self.col_name = self.input_cols[0]
68
+
69
+ def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
70
+ df = samples.to_pandas()
71
+
72
+ if df.empty:
73
+ raise ValueError("Cannot compute reduction for empty dataset")
74
+
75
+ col = df[self.col_name]
76
+
77
+ strategy = self.reduction_strategy
78
+ if strategy == "mode":
79
+ value = col.mode().iloc[0] if not col.mode().empty else None
80
+ elif strategy == "min":
81
+ value = col.min()
82
+ elif strategy == "max":
83
+ value = col.max()
84
+ elif strategy == "mean":
85
+ value = col.mean()
86
+ elif strategy == "median":
87
+ value = col.median()
88
+ else:
89
+ raise ValueError(f"Unsupported reduction strategy: {strategy}")
90
+
91
+ if value is None or (isinstance(value, float) and np.isnan(value)):
92
+ raise ValueError(
93
+ f"Could not compute {strategy} for column '{self.col_name}'"
94
+ )
95
+
96
+ logger.info(
97
+ f"Replacing all values in column '{self.col_name}' with {strategy} value: '{value}'"
98
+ )
99
+
100
+ df[self.col_name] = value
101
+ return Dataset.from_pandas(df)
@@ -0,0 +1,20 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """New flow implementation for SDG Hub.
3
+
4
+ This module provides a redesigned Flow class with metadata support,
5
+ dual initialization modes, and runtime parameter overrides.
6
+ """
7
+
8
+ # Local
9
+ from .base import Flow
10
+ from .metadata import FlowMetadata, FlowParameter
11
+ from .registry import FlowRegistry
12
+ from .validation import FlowValidator
13
+
14
+ __all__ = [
15
+ "Flow",
16
+ "FlowMetadata",
17
+ "FlowParameter",
18
+ "FlowRegistry",
19
+ "FlowValidator",
20
+ ]