PyPI - sdg-hub - Versions diffs - 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

sdg-hub 0.1.4py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

sdg_hub/__init__.py +28 -1
sdg_hub/_version.py +2 -2
sdg_hub/core/__init__.py +22 -0
sdg_hub/core/blocks/__init__.py +58 -0
sdg_hub/core/blocks/base.py +313 -0
sdg_hub/core/blocks/deprecated_blocks/__init__.py +29 -0
sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +93 -0
sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +88 -0
sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +103 -0
sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +94 -0
sdg_hub/core/blocks/deprecated_blocks/llmblock.py +479 -0
sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +88 -0
sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +58 -0
sdg_hub/core/blocks/deprecated_blocks/selector.py +97 -0
sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +88 -0
sdg_hub/core/blocks/evaluation/__init__.py +9 -0
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +564 -0
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +564 -0
sdg_hub/core/blocks/evaluation/verify_question_block.py +564 -0
sdg_hub/core/blocks/filtering/__init__.py +12 -0
sdg_hub/core/blocks/filtering/column_value_filter.py +188 -0
sdg_hub/core/blocks/llm/__init__.py +27 -0
sdg_hub/core/blocks/llm/client_manager.py +398 -0
sdg_hub/core/blocks/llm/config.py +336 -0
sdg_hub/core/blocks/llm/error_handler.py +368 -0
sdg_hub/core/blocks/llm/llm_chat_block.py +542 -0
sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +491 -0
sdg_hub/core/blocks/llm/prompt_builder_block.py +368 -0
sdg_hub/core/blocks/llm/text_parser_block.py +357 -0
sdg_hub/core/blocks/registry.py +331 -0
sdg_hub/core/blocks/transform/__init__.py +23 -0
sdg_hub/core/blocks/transform/duplicate_columns.py +88 -0
sdg_hub/core/blocks/transform/index_based_mapper.py +225 -0
sdg_hub/core/blocks/transform/melt_columns.py +126 -0
sdg_hub/core/blocks/transform/rename_columns.py +69 -0
sdg_hub/core/blocks/transform/text_concat.py +102 -0
sdg_hub/core/blocks/transform/uniform_col_val_setter.py +101 -0
sdg_hub/core/flow/__init__.py +20 -0
sdg_hub/core/flow/base.py +1209 -0
sdg_hub/core/flow/checkpointer.py +333 -0
sdg_hub/core/flow/metadata.py +389 -0
sdg_hub/core/flow/migration.py +198 -0
sdg_hub/core/flow/registry.py +393 -0
sdg_hub/core/flow/validation.py +277 -0
sdg_hub/{utils → core/utils}/__init__.py +7 -4
sdg_hub/core/utils/datautils.py +63 -0
sdg_hub/core/utils/error_handling.py +208 -0
sdg_hub/core/utils/flow_id_words.yaml +231 -0
sdg_hub/core/utils/flow_identifier.py +94 -0
sdg_hub/{utils → core/utils}/path_resolution.py +2 -2
sdg_hub/core/utils/yaml_utils.py +59 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +40 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +13 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +64 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +29 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +81 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +13 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +192 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +54 -0
sdg_hub-0.2.1.dist-info/METADATA +221 -0
sdg_hub-0.2.1.dist-info/RECORD +68 -0
sdg_hub/blocks/__init__.py +0 -42
sdg_hub/blocks/block.py +0 -96
sdg_hub/blocks/llmblock.py +0 -375
sdg_hub/blocks/openaichatblock.py +0 -556
sdg_hub/blocks/utilblocks.py +0 -597
sdg_hub/checkpointer.py +0 -139
sdg_hub/configs/annotations/cot_reflection.yaml +0 -34
sdg_hub/configs/annotations/detailed_annotations.yaml +0 -28
sdg_hub/configs/annotations/detailed_description.yaml +0 -10
sdg_hub/configs/annotations/detailed_description_icl.yaml +0 -32
sdg_hub/configs/annotations/simple_annotations.yaml +0 -9
sdg_hub/configs/knowledge/__init__.py +0 -0
sdg_hub/configs/knowledge/atomic_facts.yaml +0 -46
sdg_hub/configs/knowledge/auxilary_instructions.yaml +0 -35
sdg_hub/configs/knowledge/detailed_summary.yaml +0 -18
sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +0 -68
sdg_hub/configs/knowledge/evaluate_question.yaml +0 -38
sdg_hub/configs/knowledge/evaluate_relevancy.yaml +0 -84
sdg_hub/configs/knowledge/extractive_summary.yaml +0 -18
sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +0 -39
sdg_hub/configs/knowledge/generate_questions.yaml +0 -82
sdg_hub/configs/knowledge/generate_questions_responses.yaml +0 -56
sdg_hub/configs/knowledge/generate_responses.yaml +0 -86
sdg_hub/configs/knowledge/mcq_generation.yaml +0 -83
sdg_hub/configs/knowledge/router.yaml +0 -12
sdg_hub/configs/knowledge/simple_generate_qa.yaml +0 -34
sdg_hub/configs/reasoning/__init__.py +0 -0
sdg_hub/configs/reasoning/dynamic_cot.yaml +0 -40
sdg_hub/configs/skills/__init__.py +0 -0
sdg_hub/configs/skills/analyzer.yaml +0 -48
sdg_hub/configs/skills/annotation.yaml +0 -36
sdg_hub/configs/skills/contexts.yaml +0 -28
sdg_hub/configs/skills/critic.yaml +0 -60
sdg_hub/configs/skills/evaluate_freeform_pair.yaml +0 -111
sdg_hub/configs/skills/evaluate_freeform_questions.yaml +0 -78
sdg_hub/configs/skills/evaluate_grounded_pair.yaml +0 -119
sdg_hub/configs/skills/evaluate_grounded_questions.yaml +0 -51
sdg_hub/configs/skills/freeform_questions.yaml +0 -34
sdg_hub/configs/skills/freeform_responses.yaml +0 -39
sdg_hub/configs/skills/grounded_questions.yaml +0 -38
sdg_hub/configs/skills/grounded_responses.yaml +0 -59
sdg_hub/configs/skills/icl_examples/STEM.yaml +0 -56
sdg_hub/configs/skills/icl_examples/__init__.py +0 -0
sdg_hub/configs/skills/icl_examples/coding.yaml +0 -97
sdg_hub/configs/skills/icl_examples/extraction.yaml +0 -36
sdg_hub/configs/skills/icl_examples/humanities.yaml +0 -71
sdg_hub/configs/skills/icl_examples/math.yaml +0 -85
sdg_hub/configs/skills/icl_examples/reasoning.yaml +0 -30
sdg_hub/configs/skills/icl_examples/roleplay.yaml +0 -45
sdg_hub/configs/skills/icl_examples/writing.yaml +0 -80
sdg_hub/configs/skills/judge.yaml +0 -53
sdg_hub/configs/skills/planner.yaml +0 -67
sdg_hub/configs/skills/respond.yaml +0 -8
sdg_hub/configs/skills/revised_responder.yaml +0 -78
sdg_hub/configs/skills/router.yaml +0 -59
sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +0 -27
sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +0 -31
sdg_hub/flow.py +0 -477
sdg_hub/flow_runner.py +0 -450
sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +0 -13
sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +0 -12
sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +0 -89
sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +0 -136
sdg_hub/flows/generation/skills/improve_responses.yaml +0 -103
sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +0 -12
sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +0 -12
sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +0 -80
sdg_hub/flows/generation/skills/synth_skills.yaml +0 -59
sdg_hub/pipeline.py +0 -121
sdg_hub/prompts.py +0 -80
sdg_hub/registry.py +0 -122
sdg_hub/sdg.py +0 -206
sdg_hub/utils/config_validation.py +0 -91
sdg_hub/utils/datautils.py +0 -14
sdg_hub/utils/error_handling.py +0 -94
sdg_hub/utils/validation_result.py +0 -10
sdg_hub-0.1.4.dist-info/METADATA +0 -190
sdg_hub-0.1.4.dist-info/RECORD +0 -89
sdg_hub/{logger_config.py → core/utils/logger_config.py} +1 -1
/sdg_hub/{configs/__init__.py → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md} +0 -0
/sdg_hub/{configs/annotations → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
{sdg_hub-0.1.4.dist-info → sdg_hub-0.2.1.dist-info}/WHEEL +0 -0
{sdg_hub-0.1.4.dist-info → sdg_hub-0.2.1.dist-info}/licenses/LICENSE +0 -0
{sdg_hub-0.1.4.dist-info → sdg_hub-0.2.1.dist-info}/top_level.txt +0 -0

sdg_hub/core/blocks/transform/uniform_col_val_setter.py ADDED Viewed

@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Uniform column value setter block for replacing a column with a single statistic.
+This block sets all values in a column to a single summary statistic:
+mode, min, max, mean, or median.
+"""
+# Standard
+from typing import Any, Literal
+# Third Party
+from datasets import Dataset
+from pydantic import field_validator
+import numpy as np
+# Local
+from ...utils.logger_config import setup_logger
+from ..base import BaseBlock
+from ..registry import BlockRegistry
+logger = setup_logger(__name__)
+@BlockRegistry.register(
+    "UniformColumnValueSetter",
+    "transform",
+    "Replaces all values in a column with a single summary statistic (e.g., mode, mean, median)",
+)
+class UniformColumnValueSetter(BaseBlock):
+    """Block that replaces all values in a column with a single aggregate value.
+    Supported strategies include: mode, min, max, mean, median.
+    Attributes
+    ----------
+    block_name : str
+        Name of the block.
+    input_cols : Union[str, List[str]]
+        Must specify exactly one input column.
+    output_cols : Union[str, List[str]]
+        Output column list. Ignored — modifies in place.
+    reduction_strategy : Literal["mode", "min", "max", "mean", "median"]
+        Strategy used to compute the replacement value.
+    """
+    reduction_strategy: Literal["mode", "min", "max", "mean", "median"] = "mode"
+    @field_validator("input_cols", mode="after")
+    @classmethod
+    def validate_input_cols_single(cls, v):
+        if not v or len(v) != 1:
+            raise ValueError(
+                "UniformColumnValueSetter requires exactly one input column"
+            )
+        return v
+    def model_post_init(self, __context: Any) -> None:
+        if hasattr(super(), "model_post_init"):
+            super().model_post_init(__context)
+        if self.output_cols and len(self.output_cols) > 0:
+            logger.warning(
+                f"UniformColumnValueSetter modifies columns in-place. "
+                f"Specified output_cols {self.output_cols} will be ignored."
+            )
+        self.output_cols = []
+        self.col_name = self.input_cols[0]
+    def generate(self, samples: Dataset, **kwargs: Any) -> Dataset:
+        df = samples.to_pandas()
+        if df.empty:
+            raise ValueError("Cannot compute reduction for empty dataset")
+        col = df[self.col_name]
+        strategy = self.reduction_strategy
+        if strategy == "mode":
+            value = col.mode().iloc[0] if not col.mode().empty else None
+        elif strategy == "min":
+            value = col.min()
+        elif strategy == "max":
+            value = col.max()
+        elif strategy == "mean":
+            value = col.mean()
+        elif strategy == "median":
+            value = col.median()
+        else:
+            raise ValueError(f"Unsupported reduction strategy: {strategy}")
+        if value is None or (isinstance(value, float) and np.isnan(value)):
+            raise ValueError(
+                f"Could not compute {strategy} for column '{self.col_name}'"
+            )
+        logger.info(
+            f"Replacing all values in column '{self.col_name}' with {strategy} value: '{value}'"
+        )
+        df[self.col_name] = value
+        return Dataset.from_pandas(df)

sdg_hub/core/flow/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+"""New flow implementation for SDG Hub.
+This module provides a redesigned Flow class with metadata support,
+dual initialization modes, and runtime parameter overrides.
+"""
+# Local
+from .base import Flow
+from .metadata import FlowMetadata, FlowParameter
+from .registry import FlowRegistry
+from .validation import FlowValidator
+__all__ = [
+    "Flow",
+    "FlowMetadata",
+    "FlowParameter",
+    "FlowRegistry",
+    "FlowValidator",
+]

sdg-hub 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl

sdg-hub 0.1.4py3-none-any.whl → 0.2.1py3-none-any.whl