PyPI - sdg-hub - Versions diffs - 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

sdg-hub 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

sdg_hub/__init__.py +28 -1
sdg_hub/_version.py +2 -2
sdg_hub/core/__init__.py +22 -0
sdg_hub/core/blocks/__init__.py +58 -0
sdg_hub/core/blocks/base.py +313 -0
sdg_hub/core/blocks/deprecated_blocks/__init__.py +29 -0
sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +93 -0
sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +88 -0
sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +103 -0
sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +94 -0
sdg_hub/core/blocks/deprecated_blocks/llmblock.py +479 -0
sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +88 -0
sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +58 -0
sdg_hub/core/blocks/deprecated_blocks/selector.py +97 -0
sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +88 -0
sdg_hub/core/blocks/evaluation/__init__.py +9 -0
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +564 -0
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +564 -0
sdg_hub/core/blocks/evaluation/verify_question_block.py +564 -0
sdg_hub/core/blocks/filtering/__init__.py +12 -0
sdg_hub/core/blocks/filtering/column_value_filter.py +188 -0
sdg_hub/core/blocks/llm/__init__.py +25 -0
sdg_hub/core/blocks/llm/client_manager.py +398 -0
sdg_hub/core/blocks/llm/config.py +336 -0
sdg_hub/core/blocks/llm/error_handler.py +368 -0
sdg_hub/core/blocks/llm/llm_chat_block.py +542 -0
sdg_hub/core/blocks/llm/prompt_builder_block.py +368 -0
sdg_hub/core/blocks/llm/text_parser_block.py +310 -0
sdg_hub/core/blocks/registry.py +331 -0
sdg_hub/core/blocks/transform/__init__.py +23 -0
sdg_hub/core/blocks/transform/duplicate_columns.py +88 -0
sdg_hub/core/blocks/transform/index_based_mapper.py +225 -0
sdg_hub/core/blocks/transform/melt_columns.py +126 -0
sdg_hub/core/blocks/transform/rename_columns.py +69 -0
sdg_hub/core/blocks/transform/text_concat.py +102 -0
sdg_hub/core/blocks/transform/uniform_col_val_setter.py +101 -0
sdg_hub/core/flow/__init__.py +20 -0
sdg_hub/core/flow/base.py +980 -0
sdg_hub/core/flow/metadata.py +344 -0
sdg_hub/core/flow/migration.py +187 -0
sdg_hub/core/flow/registry.py +330 -0
sdg_hub/core/flow/validation.py +265 -0
sdg_hub/{utils → core/utils}/__init__.py +6 -4
sdg_hub/{utils → core/utils}/datautils.py +1 -3
sdg_hub/core/utils/error_handling.py +208 -0
sdg_hub/{utils → core/utils}/path_resolution.py +2 -2
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +40 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +13 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +64 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +29 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +81 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +13 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +191 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +54 -0
sdg_hub-0.2.0.dist-info/METADATA +218 -0
sdg_hub-0.2.0.dist-info/RECORD +63 -0
sdg_hub/blocks/__init__.py +0 -42
sdg_hub/blocks/block.py +0 -96
sdg_hub/blocks/llmblock.py +0 -375
sdg_hub/blocks/openaichatblock.py +0 -556
sdg_hub/blocks/utilblocks.py +0 -597
sdg_hub/checkpointer.py +0 -139
sdg_hub/configs/annotations/cot_reflection.yaml +0 -34
sdg_hub/configs/annotations/detailed_annotations.yaml +0 -28
sdg_hub/configs/annotations/detailed_description.yaml +0 -10
sdg_hub/configs/annotations/detailed_description_icl.yaml +0 -32
sdg_hub/configs/annotations/simple_annotations.yaml +0 -9
sdg_hub/configs/knowledge/__init__.py +0 -0
sdg_hub/configs/knowledge/atomic_facts.yaml +0 -46
sdg_hub/configs/knowledge/auxilary_instructions.yaml +0 -35
sdg_hub/configs/knowledge/detailed_summary.yaml +0 -18
sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +0 -68
sdg_hub/configs/knowledge/evaluate_question.yaml +0 -38
sdg_hub/configs/knowledge/evaluate_relevancy.yaml +0 -84
sdg_hub/configs/knowledge/extractive_summary.yaml +0 -18
sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +0 -39
sdg_hub/configs/knowledge/generate_questions.yaml +0 -82
sdg_hub/configs/knowledge/generate_questions_responses.yaml +0 -56
sdg_hub/configs/knowledge/generate_responses.yaml +0 -86
sdg_hub/configs/knowledge/mcq_generation.yaml +0 -83
sdg_hub/configs/knowledge/router.yaml +0 -12
sdg_hub/configs/knowledge/simple_generate_qa.yaml +0 -34
sdg_hub/configs/reasoning/__init__.py +0 -0
sdg_hub/configs/reasoning/dynamic_cot.yaml +0 -40
sdg_hub/configs/skills/__init__.py +0 -0
sdg_hub/configs/skills/analyzer.yaml +0 -48
sdg_hub/configs/skills/annotation.yaml +0 -36
sdg_hub/configs/skills/contexts.yaml +0 -28
sdg_hub/configs/skills/critic.yaml +0 -60
sdg_hub/configs/skills/evaluate_freeform_pair.yaml +0 -111
sdg_hub/configs/skills/evaluate_freeform_questions.yaml +0 -78
sdg_hub/configs/skills/evaluate_grounded_pair.yaml +0 -119
sdg_hub/configs/skills/evaluate_grounded_questions.yaml +0 -51
sdg_hub/configs/skills/freeform_questions.yaml +0 -34
sdg_hub/configs/skills/freeform_responses.yaml +0 -39
sdg_hub/configs/skills/grounded_questions.yaml +0 -38
sdg_hub/configs/skills/grounded_responses.yaml +0 -59
sdg_hub/configs/skills/icl_examples/STEM.yaml +0 -56
sdg_hub/configs/skills/icl_examples/__init__.py +0 -0
sdg_hub/configs/skills/icl_examples/coding.yaml +0 -97
sdg_hub/configs/skills/icl_examples/extraction.yaml +0 -36
sdg_hub/configs/skills/icl_examples/humanities.yaml +0 -71
sdg_hub/configs/skills/icl_examples/math.yaml +0 -85
sdg_hub/configs/skills/icl_examples/reasoning.yaml +0 -30
sdg_hub/configs/skills/icl_examples/roleplay.yaml +0 -45
sdg_hub/configs/skills/icl_examples/writing.yaml +0 -80
sdg_hub/configs/skills/judge.yaml +0 -53
sdg_hub/configs/skills/planner.yaml +0 -67
sdg_hub/configs/skills/respond.yaml +0 -8
sdg_hub/configs/skills/revised_responder.yaml +0 -78
sdg_hub/configs/skills/router.yaml +0 -59
sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +0 -27
sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +0 -31
sdg_hub/flow.py +0 -477
sdg_hub/flow_runner.py +0 -450
sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +0 -13
sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +0 -12
sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +0 -89
sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +0 -148
sdg_hub/flows/generation/skills/improve_responses.yaml +0 -103
sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +0 -12
sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +0 -12
sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +0 -80
sdg_hub/flows/generation/skills/synth_skills.yaml +0 -59
sdg_hub/pipeline.py +0 -121
sdg_hub/prompts.py +0 -74
sdg_hub/registry.py +0 -122
sdg_hub/sdg.py +0 -206
sdg_hub/utils/config_validation.py +0 -91
sdg_hub/utils/error_handling.py +0 -94
sdg_hub/utils/validation_result.py +0 -10
sdg_hub-0.1.3.dist-info/METADATA +0 -190
sdg_hub-0.1.3.dist-info/RECORD +0 -89
sdg_hub/{logger_config.py → core/utils/logger_config.py} +1 -1
/sdg_hub/{configs/__init__.py → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md} +0 -0
/sdg_hub/{configs/annotations → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
{sdg_hub-0.1.3.dist-info → sdg_hub-0.2.0.dist-info}/WHEEL +0 -0
{sdg_hub-0.1.3.dist-info → sdg_hub-0.2.0.dist-info}/licenses/LICENSE +0 -0
{sdg_hub-0.1.3.dist-info → sdg_hub-0.2.0.dist-info}/top_level.txt +0 -0

sdg_hub/sdg.py DELETED Viewed

@@ -1,206 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-"""Synthetic Data Generator (SDG) module for managing data generation flows."""
-# Standard
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import List, Optional, Tuple
-import traceback
-# Third Party
-from datasets import Dataset
-from tqdm import tqdm
-# Local
-from .checkpointer import Checkpointer
-from .flow import Flow
-from .logger_config import setup_logger
-from .utils.datautils import safe_concatenate_datasets
-logger = setup_logger(__name__)
-class SDG:
-    """Synthetic Data Generator class.
-    This class manages the generation of synthetic data using one or more
-    data generation flows.
-    Parameters
-    ----------
-    flows : List[Flow]
-        List of flows to execute.
-    num_workers : int, optional
-        Number of worker threads to use, by default 1
-    batch_size : Optional[int], optional
-        Size of batches to process, by default None
-    save_freq : Optional[int], optional
-        Frequency of checkpoint saves, by default None
-    Attributes
-    ----------
-    flows : List[Flow]
-        List of flows to execute.
-    num_workers : int
-        Number of worker threads to use.
-    batch_size : Optional[int]
-        Size of batches to process.
-    save_freq : Optional[int]
-        Frequency of checkpoint saves.
-    """
-    def __init__(
-        self,
-        flows: List[Flow],
-        num_workers: int = 1,
-        batch_size: Optional[int] = None,
-        save_freq: Optional[int] = None,
-    ) -> None:
-        self.flows = flows
-        self.num_workers = num_workers
-        self.batch_size = batch_size
-        self.save_freq = save_freq
-    def _split_dataset(
-        self, dataset: Dataset, batch_size: int
-    ) -> List[Tuple[int, int]]:
-        """Split the dataset into smaller batches.
-        Parameters
-        ----------
-        dataset : Dataset
-            The dataset to split.
-        batch_size : int
-            Size of each batch.
-        Returns
-        -------
-        List[Tuple[int, int]]
-            List of (start, end) indices for each batch.
-        """
-        total_size = len(dataset)
-        num_batches = (total_size + batch_size - 1) // batch_size
-        batches = [
-            (i * batch_size, min((i + 1) * batch_size, total_size))
-            for i in tqdm(range(num_batches))
-        ]
-        return batches
-    @staticmethod
-    def _generate_data(
-        flows: List[Flow],
-        input_split: Tuple[int, int],
-        ds: Dataset,
-        i: Optional[int] = None,
-    ) -> Optional[Dataset]:
-        """Generate data for a single split using the provided flows.
-        Parameters
-        ----------
-        flows : List[Flow]
-            List of flows to execute.
-        input_split : Tuple[int, int]
-            (start, end) indices for the current split.
-        ds : Dataset
-            The full input dataset.
-        i : Optional[int], optional
-            Split index for logging, by default None
-        Returns
-        -------
-        Optional[Dataset]
-            Generated dataset for the split, or None if generation failed.
-        """
-        logger.info(f"Processing split {i}")
-        input_split = ds.select(range(input_split[0], input_split[1]))
-        try:
-            for flow in flows:
-                input_split = flow.generate(input_split)
-            return input_split
-        except Exception as e:
-            logger.error(f"Error processing split {i}: {e}")
-            traceback.print_exc()
-            return None
-    def generate(
-        self, dataset: Dataset, checkpoint_dir: Optional[str] = None
-    ) -> Dataset:
-        """Generate synthetic data using the configured flows.
-        Parameters
-        ----------
-        dataset : Dataset
-            The input dataset to process.
-        checkpoint_dir : Optional[str], optional
-            Directory to save checkpoints, by default None
-        Returns
-        -------
-        Dataset
-            The generated dataset.
-        Notes
-        -----
-        If checkpoint_dir is provided, the generation process can be resumed
-        from the last checkpoint in case of interruption.
-        """
-        # Initialize checkpointer
-        checkpointer = Checkpointer(checkpoint_dir, self.save_freq)
-        # Load existing checkpoints and determine missing data
-        seed_data, pre_generated_data = checkpointer.load_existing_data(dataset)
-        # If all data has been generated, return the pre-generated data
-        if seed_data.num_rows == 0 and pre_generated_data is not None:
-            return pre_generated_data
-        if not self.batch_size:
-            # If batch size is not provided, generate the dataset in a single pass
-            generated_dataset = seed_data
-            # generated_data is initialized with seed_data, and it gets updated with each flow
-            for flow in self.flows:
-                generated_dataset = flow.generate(generated_dataset)
-            return generated_dataset
-        logger.info("Splitting the dataset into smaller batches")
-        input_splits = self._split_dataset(seed_data, self.batch_size)
-        logger.info(
-            f"Generating dataset with {len(input_splits)} splits, "
-            f"batch size {self.batch_size}, and {self.num_workers} workers"
-        )
-        generated_data = [pre_generated_data] if pre_generated_data else []
-        last_saved_split_index = 0  # To track the last saved split
-        with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
-            futures = [
-                executor.submit(
-                    self._generate_data, self.flows, input_split, seed_data, i
-                )
-                for i, input_split in enumerate(input_splits)
-            ]
-            for i, future in enumerate(tqdm(as_completed(futures), total=len(futures))):
-                generated_data_split = future.result()  # Ensure each future completes
-                if generated_data_split:
-                    generated_data.append(generated_data_split)
-                    logger.info(f"Finished future processing split {i} \n\n")
-                    # Use checkpointer to handle intermediate saves
-                    if checkpointer.should_save_checkpoint(i):
-                        # Save only the new splits since the last checkpoint
-                        new_splits = generated_data[last_saved_split_index : i + 1]
-                        checkpoint_dataset = safe_concatenate_datasets(new_splits)
-                        # check if checkpoint_dataset is not None
-                        if checkpoint_dataset:
-                            checkpointer.save_intermediate_checkpoint(
-                                checkpoint_dataset
-                            )
-                            last_saved_split_index = i + 1
-        generated_dataset = safe_concatenate_datasets(generated_data)
-        return generated_dataset

sdg_hub/utils/config_validation.py DELETED Viewed

@@ -1,91 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-"""Configuration validation utilities for SDG Hub.
-This module provides functions to validate configuration files used by blocks,
-ensuring they meet the required schema and contain all necessary fields.
-"""
-# Standard
-from typing import Any, Dict, List
-# Local
-from ..logger_config import setup_logger
-logger = setup_logger(__name__)
-def validate_prompt_config_schema(
-    config: Dict[str, Any], config_path: str
-) -> tuple[bool, List[str]]:
-    """Validate that a prompt configuration file has the required schema fields.
-    For prompt template configs, 'system' and 'generation' are required fields.
-    Other fields like 'introduction', 'principles', 'examples', 'start_tags', 'end_tags' are optional.
-    Parameters
-    ----------
-    config : Dict[str, Any]
-        The loaded configuration dictionary.
-    config_path : str
-        The path to the configuration file (for error reporting).
-    Returns
-    -------
-    tuple[bool, List[str]]
-        A tuple containing:
-        - bool: True if schema is valid, False otherwise
-        - List[str]: List of validation error messages (empty if valid)
-    """
-    required_fields = ["system", "generation"]
-    errors = []
-    # Ensure config is a dictionary
-    if not isinstance(config, dict):
-        errors.append(f"Configuration must be a dictionary, got {type(config).__name__}")
-        return False, errors
-    # Check for missing required fields
-    missing_fields = [field for field in required_fields if field not in config]
-    if missing_fields:
-        errors.append(f"Missing required fields: {missing_fields}")
-    # Check for empty or null required fields and validate they are strings
-    for field in required_fields:
-        if field in config:
-            value = config[field]
-            if value is None:
-                errors.append(f"Required field '{field}' is null")
-            elif not isinstance(value, str):
-                errors.append(f"Required field '{field}' must be a string, got {type(value).__name__}")
-            elif not value.strip():
-                errors.append(f"Required field '{field}' is empty")
-    # Check optional string fields are strings when present
-    string_fields = ["introduction", "principles", "examples"]
-    for field in string_fields:
-        if field in config:
-            value = config[field]
-            if value is not None and not isinstance(value, str):
-                errors.append(f"Field '{field}' must be a string, got {type(value).__name__}")
-    # Check start_tags and end_tags are lists of strings when present
-    tag_fields = ["start_tags", "end_tags"]
-    for field in tag_fields:
-        if field in config:
-            value = config[field]
-            if value is not None:
-                if not isinstance(value, list):
-                    errors.append(f"Field '{field}' must be a list, got {type(value).__name__}")
-                else:
-                    for i, tag in enumerate(value):
-                        if not isinstance(tag, str):
-                            errors.append(f"Field '{field}[{i}]' must be a string, got {type(tag).__name__}")
-    # Log validation results
-    if errors:
-        for error in errors:
-            logger.error(f"Config validation failed for {config_path}: {error}")
-        return False, errors
-    logger.debug(f"Config validation passed for {config_path}")
-    return True, []

sdg_hub/utils/error_handling.py DELETED Viewed

@@ -1,94 +0,0 @@
-"""Custom exception classes for SDG Hub error handling."""
-class SDGHubError(Exception):
-    """Base exception class for all SDG Hub errors."""
-    def __init__(self, message: str, details: str = None):
-        """Initialize SDGHubError.
-        Parameters
-        ----------
-        message : str
-            The main error message.
-        details : str, optional
-            Additional details about the error.
-        """
-        self.message = message
-        self.details = details
-        full_message = message
-        if details:
-            full_message = f"{message}\nDetails: {details}"
-        super().__init__(full_message)
-class FlowRunnerError(SDGHubError):
-    """Base exception class for flow runner errors."""
-    pass
-class DatasetLoadError(FlowRunnerError):
-    """Raised when dataset loading fails."""
-    pass
-class FlowConfigurationError(FlowRunnerError):
-    """Raised when flow configuration is invalid."""
-    pass
-class APIConnectionError(FlowRunnerError):
-    """Raised when API connection fails."""
-    pass
-class DataGenerationError(FlowRunnerError):
-    """Raised when data generation fails."""
-    pass
-class DataSaveError(FlowRunnerError):
-    """Raised when saving generated data fails."""
-    pass
-class BlockError(SDGHubError):
-    """Base exception class for block-related errors."""
-    pass
-class BlockConfigurationError(BlockError):
-    """Raised when block configuration is invalid."""
-    pass
-class BlockExecutionError(BlockError):
-    """Raised when block execution fails."""
-    pass
-class FlowError(SDGHubError):
-    """Base exception class for flow-related errors."""
-    pass
-class FlowValidationError(FlowError):
-    """Raised when flow validation fails."""
-    pass
-class FlowExecutionError(FlowError):
-    """Raised when flow execution fails."""
-    pass

sdg_hub/utils/validation_result.py DELETED Viewed

@@ -1,10 +0,0 @@
-from typing import List
-class ValidationResult:
-    def __init__(self, valid: bool, errors: List[str]):
-        self.valid = valid
-        self.errors = errors
-    def __repr__(self):
-        return f"ValidationResult(valid={self.valid}, errors={self.errors})"

sdg_hub-0.1.3.dist-info/METADATA DELETED Viewed

@@ -1,190 +0,0 @@
-Metadata-Version: 2.4
-Name: sdg_hub
-Version: 0.1.3
-Summary: Synthetic Data Generation
-Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
-License: Apache-2.0
-Project-URL: homepage, https://ai-innovation.team/
-Project-URL: source, https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub
-Project-URL: issues, https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/issues
-Classifier: Development Status :: 3 - Alpha
-Classifier: Environment :: Console
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: MacOS :: MacOS X
-Classifier: Operating System :: POSIX :: Linux
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: Implementation :: CPython
-Requires-Python: >=3.9
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: click<9.0.0,>=8.1.7
-Requires-Dist: datasets<4.0.0,>=2.18.0
-Requires-Dist: httpx<1.0.0,>=0.25.0
-Requires-Dist: jinja2
-Requires-Dist: openai<2.0.0,>=1.13.3
-Requires-Dist: rich
-Requires-Dist: tenacity!=8.4.0,>=8.3.0
-Requires-Dist: tqdm<5.0.0,>=4.66.2
-Provides-Extra: web-interface
-Requires-Dist: flask>=3.0.2; extra == "web-interface"
-Requires-Dist: pyyaml>=6.0.1; extra == "web-interface"
-Requires-Dist: flask-wtf>=1.2.2; extra == "web-interface"
-Provides-Extra: vllm
-Requires-Dist: vllm>=0.9.1; extra == "vllm"
-Requires-Dist: torch>=2.0.0; extra == "vllm"
-Requires-Dist: transformers>=4.37.0; extra == "vllm"
-Requires-Dist: accelerate>=0.21.0; extra == "vllm"
-Requires-Dist: xformers>=0.0.22.post7; extra == "vllm"
-Provides-Extra: examples
-Requires-Dist: tabulate>=0.9.0; extra == "examples"
-Requires-Dist: transformers>=4.37.0; extra == "examples"
-Requires-Dist: langchain-text-splitters; extra == "examples"
-Requires-Dist: docling>=2.3.0; extra == "examples"
-Provides-Extra: dev
-Requires-Dist: pre-commit<4.0,>=3.0.4; extra == "dev"
-Requires-Dist: pylint<4.0,>=2.16.2; extra == "dev"
-Requires-Dist: pylint-pydantic; extra == "dev"
-Requires-Dist: pytest; extra == "dev"
-Requires-Dist: pytest-asyncio; extra == "dev"
-Requires-Dist: pytest-cov; extra == "dev"
-Requires-Dist: pytest-html; extra == "dev"
-Requires-Dist: tox<5,>=4.4.2; extra == "dev"
-Dynamic: license-file
-# SDG Hub: Synthetic Data Generation Toolkit
-[![Build](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/pypi.yaml/badge.svg?branch=main)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/pypi.yaml)
-[![Release](https://img.shields.io/github/v/release/Red-Hat-AI-Innovation-Team/sdg_hub)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/releases)
-[![License](https://img.shields.io/github/license/Red-Hat-AI-Innovation-Team/sdg_hub)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/blob/main/LICENSE)
-[![Tests](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/test.yml/badge.svg)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/test.yml)
-[![codecov](https://codecov.io/gh/Red-Hat-AI-Innovation-Team/sdg_hub/graph/badge.svg?token=SP75BCXWO2)](https://codecov.io/gh/Red-Hat-AI-Innovation-Team/sdg_hub)
-<html>
-    <h3 align="center">
-      A modular, scalable, and efficient solution for creating synthetic data generation flows in a "low-code" manner.
-    </h3>
-    <h3 align="center">
-      <a href="http://ai-innovation.team/sdg_hub">Documentation</a> |
-      <a href="examples/">Examples</a> |
-      <a href="https://www.youtube.com/watch?v=aGKCViWjAmA">Video Tutorial</a>
-    </h3>
-</html>
-SDG Hub is designed to simplify data creation for LLMs, allowing users to chain computational units and build powerful flows for generating data and processing tasks. Define complex workflows using nothing but YAML configuration files.
-**📖 Full documentation available at: [https://ai-innovation.team/sdg_hub](https://ai-innovation.team/sdg_hub)**
----
-## ✨ Key Features
-- **Low-Code Flow Creation**: Build sophisticated data generation pipelines using
-  simple YAML configuration files without writing any code.
-- **Modular Block System**: Compose workflows from reusable, self-contained
-  blocks that handle LLM calls, data transformations, and filtering.
-- **LLM-Agnostic**: Works with any language model through configurable
-  prompt templates and generation parameters.
-- **Prompt Engineering Friendly**: Tune LLM behavior by editing declarative YAML prompts.
-## 🚀 Installation
-### Stable Release (Recommended)
-```bash
-pip install sdg-hub
-```
-### Development Version
-```bash
-pip install git+https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub.git
-```
-## 🏁 Quick Start
-### Prerequisites
-Before getting started, make sure you have:
-- Python 3.8 or higher
-- LLM Inference Endpoint exposed through OpenAI API
-### Simple Example
-Here's the simplest way to get started:
-```python
-from sdg_hub.flow_runner import run_flow
-# Run a basic knowledge generation flow
-run_flow(
-    ds_path="my_data.jsonl",
-    save_path="output.jsonl",
-    endpoint="http://0.0.0.0:8000/v1",
-    flow_path="flows/generation/knowledge/synth_knowledge.yaml"
-)
-```
-### Advanced Configuration
-You can invoke any built-in flow using run_flow:
-```python
-from sdg_hub.flow_runner import run_flow
-run_flow(
-    ds_path="path/to/dataset.jsonl",
-    save_path="path/to/output.jsonl",
-    endpoint="http://0.0.0.0:8000/v1",
-    flow_path="path/to/flow.yaml",
-    checkpoint_dir="path/to/checkpoints",
-    batch_size=8,
-    num_workers=32,
-    save_freq=2,
-)
-```
-### 📂 Available Built-in Flows
-You can start with any of these YAML flows out of the box:
-#### 🔎 **Knowledge Flows**
-| Flow | Description |
-|------|-------------|
-| `synth_knowledge.yaml` | Produces document-grounded questions and answers for factual memorization |
-| `synth_knowledge1.5.yaml` | Improved version that builds intermediate representations for better recall |
-#### 🧠 **Skills Flows**
-| Flow | Description |
-|------|-------------|
-| `synth_skills.yaml` | Freeform skills QA generation (eg: "Create a new github issue to add type hints") |
-| `synth_grounded_skills.yaml` | Domain-specific skill generation (eg: "From the given conversation create a table for feature requests") |
-| `improve_responses.yaml` | Uses planning and critique-based refinement to improve generated answers |
-All these can be found here: [flows](src/sdg_hub/flows)
-## 📺 Video Tutorial
-For a comprehensive walkthrough of sdg_hub:
-[![SDG Hub Tutorial](https://img.youtube.com/vi/aGKCViWjAmA/0.jpg)](https://www.youtube.com/watch?v=aGKCViWjAmA)
-## 🤝 Contributing
-We welcome contributions from the community! Whether it's bug reports, feature requests, documentation improvements, or code contributions, please check out our [contribution guidelines](CONTRIBUTING.md).
-## 📄 License
-This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
----
-Built with ❤️ by the Red Hat AI Innovation Team

sdg_hub-0.1.3.dist-info/RECORD DELETED Viewed

@@ -1,89 +0,0 @@
-sdg_hub/__init__.py,sha256=5Wa6onDndPvG4iwnjq2jK747t3-7XKdQn2WfHfq1sFc,67
-sdg_hub/_version.py,sha256=NIzzV8ZM0W-CSLuEs1weG4zPrn_-8yr1AwwI1iuS6yo,511
-sdg_hub/checkpointer.py,sha256=R0pNKL_q7-BerxmIarY0w1nFYaq7fGnoRRkCVL6Z-Gw,5053
-sdg_hub/flow.py,sha256=14WDZfb-VDUBwXsVo9u5oMuWD6aOm-GWtIdT64z4j-0,18050
-sdg_hub/flow_runner.py,sha256=rSoXoN2n2vsMmOnsRImeQivsY9zlrDig53O9DBbQzz0,15177
-sdg_hub/logger_config.py,sha256=7uHEJVRfym1c4n95DOKHelLXqAus8uHsZYmzLsEjqpo,422
-sdg_hub/pipeline.py,sha256=mahktfoCMVnuBnvLNjAVOAoFKNQo-wb0Dz1_xdYhKDM,3852
-sdg_hub/prompts.py,sha256=Gto1KcIhO-50ERvZx1Qzu-eAhSlIkOjYH9F6j2eIPfY,17482
-sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/registry.py,sha256=Sc_HNxo4n0pgWMiEDd_sLjxaSXAMZFiHJIhQKqjywwk,3772
-sdg_hub/sdg.py,sha256=8SKrSnqyvJAwE2Muf9lXw9ONRcDzqmCtaEzFHCYW4CY,6914
-sdg_hub/blocks/__init__.py,sha256=I-kMjIM7E1NrPLyBuUi0yNoXnuw_kTK3A7ybyt3pOxU,936
-sdg_hub/blocks/block.py,sha256=zdeyDyYiY0EdD3xS7kZR2hRZCRkbygQ4WONp_zv3X7w,3051
-sdg_hub/blocks/llmblock.py,sha256=nWslPFZSCiyL7MXQurOk6Jx29UOsgnVDMI3PTwje7kg,13678
-sdg_hub/blocks/openaichatblock.py,sha256=BWsWFEozWptwe1MMaz-_ZmgQPsNbCRun6ZlaKD3ICxQ,20016
-sdg_hub/blocks/utilblocks.py,sha256=U2PQk26cwHOgofk5IenHjrao08gbqPFOBNRy5QJ-EEY,18290
-sdg_hub/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/annotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/annotations/cot_reflection.yaml,sha256=60EdsTe1y7GoUIAWYSGfMa3EKI3oLZKCvDuKU7wHgQU,1737
-sdg_hub/configs/annotations/detailed_annotations.yaml,sha256=in21xmlhxDJGEaWh1IgINh33tEyW9AuyG3k4pWBuKSM,1520
-sdg_hub/configs/annotations/detailed_description.yaml,sha256=FsGbQMBxf1MAOi0nhrQ4icxcwYMlRura_ji9Pmeh1AA,192
-sdg_hub/configs/annotations/detailed_description_icl.yaml,sha256=NDdwo5EShnYZjm1Fn80sZTAwfnwpPigixP2hvJ8--cU,679
-sdg_hub/configs/annotations/simple_annotations.yaml,sha256=d80d0mK7Xz0MMCCSW3sYw3ztt5HASV5miu0krSAbjnA,234
-sdg_hub/configs/knowledge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/knowledge/atomic_facts.yaml,sha256=bIfQr0q0FyReO94v_lpLO56FikARCvFmZza-ISZTOnA,2453
-sdg_hub/configs/knowledge/auxilary_instructions.yaml,sha256=aCgIjvNacdC2ZHThEvhZKvwORK6KqErVvVYQYQrIDLE,2034
-sdg_hub/configs/knowledge/detailed_summary.yaml,sha256=_Mc_i9vaLp1OPKexSOURV5gbXEG41p1eELUukOhz8oM,388
-sdg_hub/configs/knowledge/evaluate_faithfulness.yaml,sha256=iuvx5vNNm_jzHlmcKF83StaDYezRz2vQn3JUHM-TMdQ,3054
-sdg_hub/configs/knowledge/evaluate_question.yaml,sha256=02mikEAJCUEkREBo7KxPY9H6iTUHQN-4cRkn2XMlVQ8,1915
-sdg_hub/configs/knowledge/evaluate_relevancy.yaml,sha256=yPyW2BeLV07cvDU8NO6f-Wc32P9iycnpXyLvvTnUy44,3651
-sdg_hub/configs/knowledge/extractive_summary.yaml,sha256=TYgJ7WQc7NFkf3GeRsbx6lwfA_xFnEOYGELewSqorp0,399
-sdg_hub/configs/knowledge/generate_code_questions_responses.yaml,sha256=cIus2JYMYDvxHFVSU9QVa-1IK5KoChb3rCU2b4b9UmI,908
-sdg_hub/configs/knowledge/generate_questions.yaml,sha256=iJtttZrVvlXFraUSrMowqTCLoJOLDbBndcTNMPTO8A4,2788
-sdg_hub/configs/knowledge/generate_questions_responses.yaml,sha256=H9nb_5xGP7k6HtC3VboXqpiI5kQ9Xp3vjhXH3YIFesk,2525
-sdg_hub/configs/knowledge/generate_responses.yaml,sha256=wwiB7lSB9yEB1XG2SIEIRtHkSlKh3NGJAmDaq2J6-ZY,2483
-sdg_hub/configs/knowledge/mcq_generation.yaml,sha256=d4VKegnVIexwCn0e2AJs-0DC6XdLyUBGaCsQVwzICUE,3152
-sdg_hub/configs/knowledge/router.yaml,sha256=9m_cX3xl808Vwrcq2PACyX45QFPkrV2nVYIY8x10JBU,119
-sdg_hub/configs/knowledge/simple_generate_qa.yaml,sha256=OsuZP9SxQeUhTsHdhUO10mnjJ1u_6xekW5IQucFpRco,1565
-sdg_hub/configs/reasoning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/reasoning/dynamic_cot.yaml,sha256=6XY_mFpB_oKFQ7U2CmHTqkJRGVHgOvpNmIDfhksYW6o,2641
-sdg_hub/configs/skills/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/skills/analyzer.yaml,sha256=QBtyjaU6HBZqzNOmev_W4_scn_hH7Rfxd2xL_LcPLho,2261
-sdg_hub/configs/skills/annotation.yaml,sha256=k5nJ357kUr0Uvq7Hkt3Ey22UbgSjgSjIomjHFfjaQnY,916
-sdg_hub/configs/skills/contexts.yaml,sha256=MZ2QpuGhTce6kuEsMleaGblljhGG-yhXBuH42htA2P4,1161
-sdg_hub/configs/skills/critic.yaml,sha256=Dr7anOKa7Xx1oDonXzsCfXwKIl4hUTArx2Sb_rgpLQI,1808
-sdg_hub/configs/skills/evaluate_freeform_pair.yaml,sha256=MOI0-GyKrJ_O4v1mm8A1lIKxXfwcS3dA7GjlpDEuXRU,4055
-sdg_hub/configs/skills/evaluate_freeform_questions.yaml,sha256=yDmLd-3A9pN5VLaT4lAcJ_ZvCY43LYlcS1KEdxpBRjU,2559
-sdg_hub/configs/skills/evaluate_grounded_pair.yaml,sha256=vMQtsHpNxPOOHnkzqWPp-N1gSfwPqTbfcKmNfhb9WS8,4648
-sdg_hub/configs/skills/evaluate_grounded_questions.yaml,sha256=9yr97azFhMdOfYp11BFtDSIhhP4wjQMOxYZnKWKlCPU,3115
-sdg_hub/configs/skills/freeform_questions.yaml,sha256=N6R3c1jNiSSw6T-OUJULpLnPHuaSXjvoNjSqTKL6EOY,1500
-sdg_hub/configs/skills/freeform_responses.yaml,sha256=4URTMsPpgSDOVj71Gw3lL82QWnUFR37iE72BIMwwv7c,1544
-sdg_hub/configs/skills/grounded_questions.yaml,sha256=t6pKjt5Fp_ThZueB7JBrUKuQLQY_At-Y9O67OtrIXMo,1898
-sdg_hub/configs/skills/grounded_responses.yaml,sha256=kVOeBp3BjKCFKG2qConXIQVVPI1EgcKJgKn6DFAkl1s,1860
-sdg_hub/configs/skills/judge.yaml,sha256=FxnJA_wdmyMyMqGEZDAT8hc2itO845mGDNXgpmV2EUU,3203
-sdg_hub/configs/skills/planner.yaml,sha256=yNF6t0EnmwYt1EV9Y3-vkmPcbOQRtvoLr8MITuiUw_A,2086
-sdg_hub/configs/skills/respond.yaml,sha256=K1Q5X5_Q1k60hNDbHDjMYBzxbyOIEEHTQcXW6qQ4Ve0,108
-sdg_hub/configs/skills/revised_responder.yaml,sha256=rjypOJbhZV9PuOD9YhlYgymxOJV8Zdzzz54x6Fxn2bY,2875
-sdg_hub/configs/skills/router.yaml,sha256=7YnFp6H5wYD8W5Qn1Ac4r9dGBSFUDhZSNwmglQ99PgQ,3545
-sdg_hub/configs/skills/simple_generate_qa_freeform.yaml,sha256=j8cJtEKSvtA__rE08iU6oz2XnfIgj0HiLVL8-6RhK3c,1431
-sdg_hub/configs/skills/simple_generate_qa_grounded.yaml,sha256=tvX9EN5TArFesOOqpdN3hb-IHe7O82a2twQd-gzyCgw,1500
-sdg_hub/configs/skills/icl_examples/STEM.yaml,sha256=5dcLC5jXOEeDasBkTunnHYrlddI3HcHYnEAXZcrd0ds,8412
-sdg_hub/configs/skills/icl_examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/skills/icl_examples/coding.yaml,sha256=a5m-pUcV9xUb54gQ5U3vsU1RBXzOmsfX0CjTW7U62zo,5240
-sdg_hub/configs/skills/icl_examples/extraction.yaml,sha256=P751l6NvFRkINWz-bX5jgnd_if2bl3d_NlhGI7g81xw,4654
-sdg_hub/configs/skills/icl_examples/humanities.yaml,sha256=tZyiJ4Q3gG4uuoDXw6g__lX3ySEUaRZW2GhW1ustwaM,11370
-sdg_hub/configs/skills/icl_examples/math.yaml,sha256=hNq-QudlXrg9CWLpJdrZ4v3vifGTWhyp2gcfwPdR3_o,6776
-sdg_hub/configs/skills/icl_examples/reasoning.yaml,sha256=eesIlH9SO07TVF20gy18MZrcDzLhSmynd_F_lvg0oQg,4335
-sdg_hub/configs/skills/icl_examples/roleplay.yaml,sha256=LYEyA7wv7QWQscUNQr0K_lotNoWSfuoAEncx3PCRYIs,6997
-sdg_hub/configs/skills/icl_examples/writing.yaml,sha256=El-57IjZ5IvdcmCHyHvX_M2RFFkEos572220be8ecrQ,11335
-sdg_hub/flows/generation/knowledge/mmlu_bench.yaml,sha256=U0S2NPkZ_9_8yQGgHJm4el-wVsg_6MllzbFT97cGNrI,343
-sdg_hub/flows/generation/knowledge/simple_knowledge.yaml,sha256=_DkBZjS47bH0Lmu0eXVRlesTxeAF8Zlzj1PgR1vruuA,295
-sdg_hub/flows/generation/knowledge/synth_knowledge.yaml,sha256=sYBzIFNBGks_o2Nwvov5MSrMadAB3g-niBAaWPbBYO0,2160
-sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml,sha256=Ao91pCtPmyJts0_aLDkl7n3q14ndvzN_nNIm5Q0RnMI,3610
-sdg_hub/flows/generation/skills/improve_responses.yaml,sha256=wUV0awTmKHNZ62pHiw_yz-IdG0OYgT_dCwlMUlZS3TA,2683
-sdg_hub/flows/generation/skills/simple_freeform_skill.yaml,sha256=iVEomFH1E52JA7KLmTIwkS1PnzxUJVPMgbK2O-m80As,309
-sdg_hub/flows/generation/skills/simple_grounded_skill.yaml,sha256=LTLxqdgbLIKSJonuIRHhcRSpit1EawwNvytWzXWXe2E,309
-sdg_hub/flows/generation/skills/synth_grounded_skills.yaml,sha256=91Dm--agpmbm02hIVnFhEndjppKsQEWXDbckR9GAzKM,2045
-sdg_hub/flows/generation/skills/synth_skills.yaml,sha256=9lhQcxXXbN4V9ztPph4fyjUtctll2FYtKY-V4grQdy4,1492
-sdg_hub/utils/__init__.py,sha256=Jfs1DAVSYDNn8dfs0Uq2MguSwu77NyhP-KufSJICiBQ,278
-sdg_hub/utils/config_validation.py,sha256=g92GxN73Mjr0cXvc5amB_Fn4iV9-iKeWmPz9HwLPmNY,3426
-sdg_hub/utils/datautils.py,sha256=0t_SZ_UXBKl8uL6rVp3SUh8YKRbzKlh2oO5gr2cKyEw,389
-sdg_hub/utils/error_handling.py,sha256=UvPEmtdpbBL71Zx8DWpIqd8869kEY2dlCH11iDgMfec,1847
-sdg_hub/utils/path_resolution.py,sha256=M7hnwoyRQTKgwGC3Ld1_KmKaO_8Lu0PCk6JtQrLp67Q,2006
-sdg_hub/utils/validation_result.py,sha256=O3zF6r49LQ9StAf_oWmK2bg-JfTQw6rpbHtHr9lI4ks,264
-sdg_hub-0.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-sdg_hub-0.1.3.dist-info/METADATA,sha256=v8k82qCPIhwhS_rBAe8S3SXTl_xu7UBAoi6NB3vzT3s,7240
-sdg_hub-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-sdg_hub-0.1.3.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
-sdg_hub-0.1.3.dist-info/RECORD,,

sdg-hub 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

sdg-hub 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl