PyPI - sdg-hub - Versions diffs - 0.4.2__tar.gz → 0.5.0__tar.gz - Mend

sdg-hub 0.4.2tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/.github/workflows/integration-test.yml RENAMED Viewed

@@ -7,29 +7,11 @@ on:
     branches:
       - "main"
       - "release-**"
-    paths:
-      # Only trigger on changes to relevant flows and examples (EXTEND THIS):
-      - 'src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/**'
-      - 'examples/knowledge_tuning/enhanced_summary_knowledge_tuning/**'
-      # Standard integration test triggers, DONT CHANGE THIS
-      - 'tests/integration/**/*.py'
-      - 'pyproject.toml'
-      - 'tox.ini'
-      - '.github/workflows/integration-test.yml'
   pull_request:
     branches:
       - "main"
       - "release-**"
     types: [opened, synchronize, reopened, labeled]
-    paths:
-      # Only trigger on changes to relevant flows and examples (EXTEND THIS):
-      - 'src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/**'
-      - 'examples/knowledge_tuning/enhanced_summary_knowledge_tuning/**'
-      # Standard integration test triggers, DONT CHANGE THIS
-      - 'tests/integration/**/*.py'
-      - 'pyproject.toml'
-      - 'tox.ini'
-      - '.github/workflows/integration-test.yml'
 env:
   LC_ALL: en_US.UTF-8
@@ -42,19 +24,58 @@ permissions:
   contents: read
 jobs:
+  check-trigger:
+    name: "Check If Integration Should Run"
+    runs-on: ubuntu-latest
+    outputs:
+      should_run: ${{ steps.check.outputs.should_run }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        if: github.event_name == 'pull_request'
+        with:
+          filters: |
+            relevant:
+              # Only trigger on changes to relevant flows and examples (EXTEND THIS):
+              - 'src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/**'
+              - 'examples/knowledge_tuning/enhanced_summary_knowledge_tuning/**'
+              # Standard integration test triggers, DONT CHANGE THIS
+              - 'tests/integration/**/*.py'
+              - 'pyproject.toml'
+              - 'tox.ini'
+              - '.github/workflows/integration-test.yml'
+      - name: Determine if tests should run
+        id: check
+        run: |
+          if [[ "${{ github.event_name }}" == "workflow_dispatch" ]] || [[ "${{ github.event_name }}" == "push" ]]; then
+            echo "should_run=true" >> "$GITHUB_OUTPUT"
+          elif [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            # Check if from fork
+            if [[ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then
+              echo "should_run=false" >> "$GITHUB_OUTPUT"
+            # Check if labeled event with correct label
+            elif [[ "${{ github.event.action }}" == "labeled" ]] && [[ "${{ contains(github.event.pull_request.labels.*.name, 'run-integration-tests') }}" == "true" ]]; then
+              echo "should_run=true" >> "$GITHUB_OUTPUT"
+            # Check if relevant paths changed for non-labeled events
+            elif [[ "${{ github.event.action }}" != "labeled" ]] && [[ "${{ steps.filter.outputs.relevant }}" == "true" ]]; then
+              echo "should_run=true" >> "$GITHUB_OUTPUT"
+            else
+              echo "should_run=false" >> "$GITHUB_OUTPUT"
+            fi
+          else
+            echo "should_run=false" >> "$GITHUB_OUTPUT"
+          fi
   integration-test:
     name: "Integration Tests - ${{ matrix.python }} on ${{ matrix.platform }}"
     runs-on: "${{ matrix.platform }}"
+    needs: check-trigger
+    if: needs.check-trigger.outputs.should_run == 'true'
     # Require manual approval before running (via GitHub Environment)
     environment: integration-tests
-    # Skip fork PRs (they can't access environment secrets anyway)
-    # Also check for 'run-integration-tests' label on labeled events
-    if: |
-      github.event_name == 'workflow_dispatch' ||
-      github.event_name == 'push' ||
-      (github.event_name == 'pull_request' &&
-       github.event.pull_request.head.repo.full_name == github.repository &&
-       (github.event.action != 'labeled' || contains(github.event.pull_request.labels.*.name, 'run-integration-tests')))
     strategy:
       matrix:
         python:
@@ -89,12 +110,9 @@ jobs:
             **/pyproject.toml
             **/requirements*.txt
-      - name: Remove llama-cpp-python from cache
-        run: |
-          pip cache remove llama_cpp_python
       - name: Cache huggingface datasets
-        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
         with:
           path: ~/.cache/huggingface
           # Invalidate cache when any example notebook changes (may affect dataset downloads)
@@ -111,10 +129,6 @@ jobs:
         run: |
           tox -e py3-integrationcov
-      - name: Remove llama-cpp-python from cache
-        if: always()
-        run: |
-          pip cache remove llama_cpp_python
       - name: Upload integration test coverage to Codecov
         uses: codecov/codecov-action@v4

sdg_hub-0.5.0/.github/workflows/packer.yml ADDED Viewed

@@ -0,0 +1,33 @@
+name: Build AMI with Packer
+on:
+  workflow_dispatch:
+jobs:
+  build-ami:
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write # This is required for OIDC
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@ff717079ee2060e4bcee96c4779b553acc87447c
+        with:
+          role-to-assume: arn:aws:iam::851725220677:role/github-actions-packer-role
+          aws-region: us-east-2
+          role-session-name: github-actions-packer  # For tracking in CloudTrail
+      - name: Setup Packer
+        uses: hashicorp/setup-packer@1aa358be5cf73883762b302a3a03abd66e75b232
+      - name: Build and create AMI
+        run: |
+          set -euo pipefail
+          cd scripts/packer
+          packer init .
+          packer validate .
+          packer build .

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/.github/workflows/test.yml RENAMED Viewed

@@ -86,16 +86,7 @@ jobs:
             **/pyproject.toml
             **/requirements*.txt
-      - name: Remove llama-cpp-python from cache
-        run: |
-          pip cache remove llama_cpp_python
-      - name: Cache huggingface
-        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
-        with:
-          path: ~/.cache/huggingface
-          # config contains DEFAULT_MODEL
-          key: huggingface-${{ hashFiles('src/instructlab/configuration.py') }}
       - name: Install dependencies
         run: |
@@ -107,10 +98,6 @@ jobs:
           tox -e py3-unitcov
-      - name: Remove llama-cpp-python from cache
-        if: always()
-        run: |
-          pip cache remove llama_cpp_python
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/CLAUDE.md RENAMED Viewed

@@ -86,7 +86,6 @@ The framework is built around a modular block system with **composability at its
   - `transform/`: Data transformation blocks (column operations, text manipulation)
   - `filtering/`: Data filtering blocks with quality thresholds
   - `evaluation/`: Quality evaluation blocks (faithfulness, relevancy assessment)
-  - `deprecated_blocks/`: Legacy blocks maintained for backward compatibility
 **Key Benefits**: Type-safe composition, automatic validation, rich logging, and high-performance async processing.
@@ -97,7 +96,6 @@ Flows orchestrate multiple blocks into data processing pipelines:
 - **FlowRegistry** (`src/sdg_hub/core/flow/registry.py`): Registry for flow discovery
 - **FlowMetadata** (`src/sdg_hub/core/flow/metadata.py`): Metadata and parameter definitions
 - **FlowValidator** (`src/sdg_hub/core/flow/validation.py`): YAML structure validation
-- **FlowMigration** (`src/sdg_hub/core/flow/migration.py`): Backward compatibility for old flow formats
 ### Flow Configuration
 Flows are defined in YAML files with this structure:
@@ -148,11 +146,6 @@ All blocks operate on HuggingFace `datasets.Dataset` objects:
 - Rich logging provides processing summaries
 - Empty dataset handling with appropriate errors
-### Backward Compatibility
-The framework maintains compatibility with legacy formats:
-- Deprecated blocks are preserved in `deprecated_blocks/`
-- Flow migration automatically converts old YAML formats
-- Legacy LLMBlocks receive special handling during execution
 ## Testing Guidelines

{sdg_hub-0.4.2/src/sdg_hub.egg-info → sdg_hub-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sdg_hub
-Version: 0.4.2
+Version: 0.5.0
 Summary: Synthetic Data Generation
 Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
 License: Apache-2.0
@@ -23,7 +23,7 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: click<9.0.0,>=8.1.7
-Requires-Dist: datasets<4.0.0,>=2.18.0
+Requires-Dist: datasets>=4.0.0
 Requires-Dist: httpx<1.0.0,>=0.25.0
 Requires-Dist: jinja2
 Requires-Dist: litellm<1.75.0,>=1.73.0

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/knowledge_mixing.ipynb RENAMED Viewed

@@ -359,7 +359,7 @@
     "processed_knowledge_dataset = processed_knowledge_dataset.remove_columns(['messages']).rename_column('messages_without_think', 'messages')\n",
     "\n",
     "cfg = RAFTConfig(k_passages=5, max_tokens_per_chunk=400, p_include_oracle=0.9)\n",
-    "raft_samples = build_raft_samples(ds, cfg)\n",
+    "raft_samples = build_raft_samples(processed_knowledge_dataset, cfg)\n",
     "raft_samples = raft_samples.map(build_messages).remove_columns(['question', 'context', 'oracle_context', 'cot_answer', 'answer', 'instruction', 'type', 'meta'])\n",
     "\n",
     "fp = \"<Instruction/Skills dataset>\" # TODO: Replace with huggingface dataset path once its uploaded\n",

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/examples/knowledge_tuning/enhanced_summary_knowledge_tuning/raft_builder.py RENAMED Viewed

@@ -237,20 +237,13 @@ def build_messages(raft_record: Dict[str, Any]):
     Output:
       messages: list of {"role": "system"|"user"|"assistant", "content": str}
     """
-    # 1. System message
-    sys_msg = raft_record.get("instruction") or (
-        "You are a domain expert. You must answer questions by first quoting a span "
-        "verbatim from the relevant passage, then giving reasoning, then the final answer. "
-        "Ignore distractor passages."
-    )
-    # 2. User message: serialize passages + question
+    # 1. User message: serialize passages + question
     passages = "\n\n".join(
         [f"[Passage {i+1}] {p}" for i, p in enumerate(raft_record["context"])]
     )
     user_msg = f"Passages:\n{passages}\n\nQuestion: {raft_record['question']}"
-    # 3. Assistant message: the gold output
+    # 2. Assistant message: the gold output
     assistant_msg = raft_record["answer"]
     return {"messages" : [

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/pyproject.toml RENAMED Viewed

@@ -30,7 +30,7 @@ classifiers = [
 # Core dependencies moved from requirements.txt
 dependencies = [
     "click>=8.1.7,<9.0.0",
-    "datasets>=2.18.0,<4.0.0",
+    "datasets>=4.0.0",
     "httpx>=0.25.0,<1.0.0",
     "jinja2",
     "litellm>=1.73.0,<1.75.0",

sdg_hub-0.5.0/scripts/packer/centos.pkr.hcl ADDED Viewed

@@ -0,0 +1,52 @@
+packer {
+  required_plugins {
+    amazon = {
+      version = ">= 1.2.8"
+      source  = "github.com/hashicorp/amazon"
+    }
+  }
+}
+variable "github_sha" {
+  type        = string
+  description = "GitHub commit SHA to tag the AMI with"
+  default     = env("GITHUB_SHA")
+}
+variable "github_repository" {
+  type        = string
+  description = "GitHub repository name to tag the AMI with"
+  default     = env("GITHUB_REPOSITORY")
+}
+source "amazon-ebs" "centos" {
+  ami_name      = "github-actions-centos-nvidia-ami-{{timestamp}}"
+  # Use the lowest-cost instance type that can efficiently build and santity-check the driver.
+  # It should be old enough to be low-cost, but new enough to be compatible with our desired driver version.
+  instance_type = "g6.xlarge"
+  region        = "us-east-2"
+  source_ami_filter {
+    filters = {
+      name                = "CentOS Stream 9 x86_64*"
+      root-device-type    = "ebs"
+      virtualization-type = "hvm"
+    }
+    most_recent = true
+    owners      = ["125523088429"] # CentOS CPE team ID.
+  }
+  ssh_username = "ec2-user"
+  tags = {
+    Name = "CentOS Stream 9 with Nvidia Drivers"
+    BuiltBy = "Packer"
+    GitHubCommitSHA = var.github_sha
+    GitHubRepository = var.github_repository
+  }
+}
+build {
+  sources = ["source.amazon-ebs.centos"]
+  provisioner "shell" {
+    script = "./setup-centos.sh"
+    execute_command = "sudo bash {{.Path}}"
+  }
+}

sdg_hub-0.5.0/scripts/packer/setup-centos.sh ADDED Viewed

@@ -0,0 +1,80 @@
+#!/bin/bash
+# Setup script for CentOS GitHub Actions AMI
+# Derived from:
+# github.com/containers/ai-lab-recipes/blob/main/training/nvidia-bootc/Containerfile
+set -euxo pipefail
+DRIVER_VERSION="580.65.06"
+# CUDA_VERSION is embedded in the driver "local repo" package
+if [[ $(id -u) != "0" ]]; then
+    echo "you must run this script as root."
+    exit 1
+fi
+function configure_dnf {
+  # Configure the DNF repos and options we need for CI.
+  dnf -y install dnf-plugins-core
+  dnf config-manager --save \
+    --setopt=skip_missing_names_on_install=False \
+    --setopt=install_weak_deps=False
+  dnf -y install epel-release
+  dnf -y install https://us.download.nvidia.com/tesla/$DRIVER_VERSION/nvidia-driver-local-repo-rhel9-$DRIVER_VERSION-1.0-1.x86_64.rpm
+  # TODO: We might be able to use a nvidia.com yum repo instead of the local repo?
+  # dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel${OS_VERSION_MAJOR}/${CUDA_REPO_ARCH}/cuda-rhel${OS_VERSION_MAJOR}.repo
+}
+function install_userland_packages {
+  # CI tests in GH Actions will require these packages:
+  dnf -y install nvtop podman skopeo git python3.12 python3.12-devel
+}
+function install_kernel_driver {
+  # Install nvidia kernel driver.
+  # DKMS will compile the nvidia.ko driver for all kernels for which we have installed a kernel-devel package.
+  # By default, the "dnf module install" command will install the latest kernel-devel package that CentOS has published.
+  dnf -y install "kernel-devel-$(uname -r)" gcc make dkms elfutils-libelf-devel  # also build for the currently-running kernel.
+  # If we had configured a previous nvidia-driver version with DNF, reset it:
+  dnf -y module reset nvidia-driver || true
+  DRIVER_STREAM=$(echo $DRIVER_VERSION | cut -d. -f1)
+  dnf -y module install nvidia-driver:${DRIVER_STREAM}-dkms # or use :latest-dkms after confirming available streams
+}
+function test_kernel_driver {
+  # The nvidia driver DNF module (above) installs a dkms RPM.
+  # That dkms RPM compiles and installs the nvidia.ko module.
+  # List all the modules that dkms has compiled:
+  dkms status || true
+  # Load the module (ok if it’s already loaded or unavailable for this kernel):
+  modprobe -q nvidia || true
+  # If a GPU is present, verify userspace; otherwise, fail the job:
+  nvidia-smi
+}
+function install_container_toolkit {
+  # Install nvidia container toolkit.
+  # When we pass GPU devices to a container (podman run --device nvidia.com/gpu=all), we use the nvidia CTK to do that.
+  # See docs at https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
+  curl -sSfL -o /etc/yum.repos.d/nvidia-container-toolkit.repo \
+    https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo
+  dnf config-manager --enable nvidia-container-toolkit-experimental
+  export NVIDIA_CONTAINER_TOOLKIT_VERSION=1.17.8-1
+  dnf install -y \
+      nvidia-container-toolkit-${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
+      nvidia-container-toolkit-base-${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
+      libnvidia-container-tools-${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
+      libnvidia-container1-${NVIDIA_CONTAINER_TOOLKIT_VERSION}
+  # Verify it works:
+  nvidia-ctk --version
+  # When you boot a node, you must run:
+  #   sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
+  # This command scans your system for NVIDIA GPUs and creates a YAML file that lists the available devices.
+}
+configure_dnf
+install_userland_packages
+install_kernel_driver
+test_kernel_driver
+install_container_toolkit

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/src/sdg_hub/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.4.2'
-__version_tuple__ = version_tuple = (0, 4, 2)
+__version__ = version = '0.5.0'
+__version_tuple__ = version_tuple = (0, 5, 0)
-__commit_id__ = commit_id = 'gfbb2504ba'
+__commit_id__ = commit_id = 'ge1e260984'

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/src/sdg_hub/core/blocks/__init__.py RENAMED Viewed

@@ -5,17 +5,6 @@ This package provides various block implementations for data generation, process
 # Local
 from .base import BaseBlock
-from .deprecated_blocks import (
-    CombineColumnsBlock,
-    DuplicateColumns,
-    FilterByValueBlock,
-    FlattenColumnsBlock,
-    LLMBlock,
-    RenameColumns,
-    SamplePopulatorBlock,
-    SelectorBlock,
-    SetToMajorityValue,
-)
 from .filtering import ColumnValueFilterBlock
 from .llm import LLMChatBlock, LLMParserBlock, PromptBuilderBlock, TextParserBlock
 from .registry import BlockRegistry
@@ -28,8 +17,6 @@ from .transform import (
     UniformColumnValueSetter,
 )
-# All blocks moved to deprecated_blocks or transform modules
 __all__ = [
     "BaseBlock",
     "BlockRegistry",
@@ -40,15 +27,6 @@ __all__ = [
     "RenameColumnsBlock",
     "TextConcatBlock",
     "UniformColumnValueSetter",
-    "CombineColumnsBlock",  # Deprecated
-    "DuplicateColumns",  # Deprecated
-    "FilterByValueBlock",  # Deprecated
-    "FlattenColumnsBlock",  # Deprecated
-    "RenameColumns",  # Deprecated
-    "SamplePopulatorBlock",  # Deprecated
-    "SelectorBlock",  # Deprecated
-    "SetToMajorityValue",  # Deprecated
-    "LLMBlock",  # Deprecated
     "LLMChatBlock",
     "LLMParserBlock",
     "TextParserBlock",

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/src/sdg_hub/core/blocks/transform/rename_columns.py RENAMED Viewed

@@ -64,6 +64,25 @@ class RenameColumnsBlock(BaseBlock):
         -------
         Dataset
             Dataset with renamed columns.
+        Raises
+        ------
+        ValueError
+            If attempting to rename to a column name that already exists.
         """
+        # Check for column name collisions
+        # Strict validation: no target column name can be an existing column name
+        # This prevents chained/circular renames which can be confusing
+        existing_cols = set(samples.column_names)
+        target_cols = set(self.input_cols.values())
+        collision = target_cols & existing_cols
+        if collision:
+            raise ValueError(
+                f"Cannot rename to existing column names: {sorted(collision)}. "
+                "Target column names must not already exist in the dataset. "
+                "Chained renames are not supported."
+            )
         # Rename columns using HuggingFace datasets method
         return samples.rename_columns(self.input_cols)

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/src/sdg_hub/core/flow/base.py RENAMED Viewed

@@ -41,7 +41,6 @@ from ..utils.time_estimator import estimate_execution_time
 from ..utils.yaml_utils import save_flow_yaml
 from .checkpointer import FlowCheckpointer
 from .metadata import DatasetRequirements, FlowMetadata
-from .migration import FlowMigration
 from .validation import FlowValidator
 logger = setup_logger(__name__)
@@ -73,8 +72,6 @@ class Flow(BaseModel):
     model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
     # Private attributes (not serialized)
-    _migrated_runtime_params: dict[str, dict[str, Any]] = {}
-    _llm_client: Any = None  # Only used for backward compatibility with old YAMLs
     _model_config_set: bool = False  # Track if model configuration has been set
     _block_metrics: list[dict[str, Any]] = PrivateAttr(
         default_factory=list
@@ -113,16 +110,13 @@ class Flow(BaseModel):
         return self
     @classmethod
-    def from_yaml(cls, yaml_path: str, client: Any = None) -> "Flow":
+    def from_yaml(cls, yaml_path: str) -> "Flow":
         """Load flow from YAML configuration file.
         Parameters
         ----------
         yaml_path : str
             Path to the YAML flow configuration file.
-        client : Any, optional
-            LLM client instance. Required for backward compatibility with old format YAMLs
-            that use deprecated LLMBlocks. Ignored for new format YAMLs.
         Returns
         -------
@@ -153,21 +147,6 @@ class Flow(BaseModel):
         except yaml.YAMLError as exc:
             raise FlowValidationError(f"Invalid YAML in {yaml_path}: {exc}") from exc
-        # Check if this is an old format flow and migrate if necessary
-        migrated_runtime_params = None
-        is_old_format = FlowMigration.is_old_format(flow_config)
-        if is_old_format:
-            logger.info(f"Detected old format flow, migrating: {yaml_path}")
-            if client is None:
-                logger.warning(
-                    "Old format YAML detected but no client provided. LLMBlocks may fail."
-                )
-            flow_config, migrated_runtime_params = FlowMigration.migrate_to_new_format(
-                flow_config, yaml_path
-            )
-            # Save migrated config back to YAML to persist id
-            save_flow_yaml(yaml_path, flow_config, "migrated to new format")
         # Validate YAML structure
         validator = FlowValidator()
         validation_errors = validator.validate_yaml_structure(flow_config)
@@ -194,19 +173,6 @@ class Flow(BaseModel):
         for i, block_config in enumerate(block_configs):
             try:
-                # Inject client for deprecated LLMBlocks if this is an old format flow
-                if (
-                    is_old_format
-                    and block_config.get("block_type") == "LLMBlock"
-                    and client is not None
-                ):
-                    if "block_config" not in block_config:
-                        block_config["block_config"] = {}
-                    block_config["block_config"]["client"] = client
-                    logger.debug(
-                        f"Injected client for deprecated LLMBlock: {block_config['block_config'].get('block_name')}"
-                    )
                 block = cls._create_block_from_config(block_config, yaml_dir)
                 blocks.append(block)
             except Exception as exc:
@@ -228,12 +194,6 @@ class Flow(BaseModel):
                 )
             else:
                 logger.debug(f"Flow already had id: {flow.metadata.id}")
-            # Store migrated runtime params and client for backward compatibility
-            if migrated_runtime_params:
-                flow._migrated_runtime_params = migrated_runtime_params
-            if is_old_format and client is not None:
-                flow._llm_client = client
             # Check if this is a flow without LLM blocks
             llm_blocks = flow._detect_llm_blocks()
             if not llm_blocks:
@@ -484,12 +444,6 @@ class Flow(BaseModel):
         self._block_metrics = []
         run_start = time.perf_counter()
-        # Merge migrated runtime params with provided ones (provided ones take precedence)
-        merged_runtime_params = self._migrated_runtime_params.copy()
-        if runtime_params:
-            merged_runtime_params.update(runtime_params)
-        runtime_params = merged_runtime_params
         # Execute flow with metrics capture, ensuring metrics are always displayed/saved
         final_dataset = None
         execution_successful = False
@@ -647,22 +601,8 @@ class Flow(BaseModel):
             input_cols = set(current_dataset.column_names)
             try:
-                # Check if this is a deprecated block and skip validations
-                is_deprecated_block = (
-                    hasattr(block, "__class__")
-                    and hasattr(block.__class__, "__module__")
-                    and "deprecated_blocks" in block.__class__.__module__
-                )
-                if is_deprecated_block:
-                    exec_logger.debug(
-                        f"Skipping validations for deprecated block: {block.block_name}"
-                    )
-                    # Call generate() directly to skip validations, but keep the runtime params
-                    current_dataset = block.generate(current_dataset, **block_kwargs)
-                else:
-                    # Execute block with validation and logging
-                    current_dataset = block(current_dataset, **block_kwargs)
+                # Execute block with validation and logging
+                current_dataset = block(current_dataset, **block_kwargs)
                 # Validate output
                 if len(current_dataset) == 0:
@@ -724,9 +664,11 @@ class Flow(BaseModel):
         return current_dataset
     def _prepare_block_kwargs(
-        self, block: BaseBlock, runtime_params: dict[str, dict[str, Any]]
+        self, block: BaseBlock, runtime_params: Optional[dict[str, dict[str, Any]]]
     ) -> dict[str, Any]:
         """Prepare execution parameters for a block."""
+        if runtime_params is None:
+            return {}
         return runtime_params.get(block.block_name, {})
     def set_model_config(
@@ -1114,22 +1056,8 @@ class Flow(BaseModel):
                 if max_concurrency is not None:
                     block_kwargs["_flow_max_concurrency"] = max_concurrency
-                # Check if this is a deprecated block and skip validations
-                is_deprecated_block = (
-                    hasattr(block, "__class__")
-                    and hasattr(block.__class__, "__module__")
-                    and "deprecated_blocks" in block.__class__.__module__
-                )
-                if is_deprecated_block:
-                    logger.debug(
-                        f"Dry run: Skipping validations for deprecated block: {block.block_name}"
-                    )
-                    # Call generate() directly to skip validations, but keep the runtime params
-                    current_dataset = block.generate(current_dataset, **block_kwargs)
-                else:
-                    # Execute block with validation and logging
-                    current_dataset = block(current_dataset, **block_kwargs)
+                # Execute block with validation and logging
+                current_dataset = block(current_dataset, **block_kwargs)
                 block_execution_time = (
                     time.perf_counter() - block_start_time

{sdg_hub-0.4.2 → sdg_hub-0.5.0}/src/sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml RENAMED Viewed

@@ -77,9 +77,13 @@ blocks:
     - ''
 - block_type: RenameColumnsBlock
   block_config:
-    block_name: rename_to_document_column
+    block_name: rename_to_raw_document_column
     input_cols:
       document: raw_document
+- block_type: RenameColumnsBlock
+  block_config:
+    block_name: rename_to_document_column
+    input_cols:
       summary: document
 - block_type: PromptBuilderBlock
   block_config:

sdg-hub 0.4.2__tar.gz → 0.5.0__tar.gz

sdg-hub 0.4.2tar.gz → 0.5.0tar.gz