PyPI - sdg-hub - Versions diffs - 0.1.0a3__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

sdg-hub 0.1.0a3py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

sdg_hub/_version.py +2 -2
sdg_hub/blocks/__init__.py +35 -5
sdg_hub/blocks/block.py +58 -16
sdg_hub/blocks/llmblock.py +149 -204
sdg_hub/blocks/utilblocks.py +500 -43
sdg_hub/checkpointer.py +139 -0
sdg_hub/configs/annotations/detailed_annotations.yaml +28 -0
sdg_hub/configs/annotations/simple_annotations.yaml +9 -0
sdg_hub/configs/knowledge/atomic_facts.yaml +1 -0
sdg_hub/configs/knowledge/detailed_summary.yaml +1 -0
sdg_hub/configs/knowledge/extractive_summary.yaml +1 -0
sdg_hub/configs/knowledge/generate_questions.yaml +82 -0
sdg_hub/configs/knowledge/generate_responses.yaml +86 -0
sdg_hub/configs/skills/contexts.yaml +18 -11
sdg_hub/configs/skills/evaluate_freeform_pair.yaml +79 -12
sdg_hub/configs/skills/evaluate_freeform_questions.yaml +60 -28
sdg_hub/configs/skills/evaluate_grounded_pair.yaml +95 -30
sdg_hub/configs/skills/freeform_questions.yaml +21 -16
sdg_hub/configs/skills/freeform_responses.yaml +19 -25
sdg_hub/configs/skills/router.yaml +53 -6
sdg_hub/flow.py +351 -21
sdg_hub/flow_runner.py +216 -0
sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +26 -9
sdg_hub/flows/generation/skills/{agentic_improve_skill.yaml → improve_responses.yaml} +26 -31
sdg_hub/flows/generation/skills/synth_skills.yaml +4 -4
sdg_hub/pipeline.py +67 -12
sdg_hub/prompts.py +26 -0
sdg_hub/sdg.py +128 -86
sdg_hub/utils/config_validation.py +91 -0
sdg_hub/utils/validation_result.py +10 -0
sdg_hub-0.1.1.dist-info/METADATA +190 -0
sdg_hub-0.1.1.dist-info/RECORD +86 -0
{sdg_hub-0.1.0a3.dist-info → sdg_hub-0.1.1.dist-info}/WHEEL +1 -1
sdg_hub/blocks/filterblock.py +0 -76
sdg_hub/blocks/iterblock.py +0 -31
sdg_hub/blocks/rmblocks.py +0 -194
sdg_hub/configs/annotations/simple.yaml +0 -10
sdg_hub/configs/knowledge/data_recipe/default_recipe.yaml +0 -3
sdg_hub/configs/skills/data_recipe/default_recipe.yaml +0 -6
sdg_hub/flows/annotation/emotion/detailed_description.yaml +0 -19
sdg_hub/flows/annotation/emotion/detailed_description_icl.yaml +0 -19
sdg_hub/flows/annotation/emotion/simple.yaml +0 -19
sdg_hub/utils/chunking.py +0 -73
sdg_hub/utils/docprocessor.py +0 -357
sdg_hub/utils/parse_and_convert.py +0 -392
sdg_hub-0.1.0a3.dist-info/METADATA +0 -154
sdg_hub-0.1.0a3.dist-info/RECORD +0 -90
/sdg_hub/configs/{knowledge/data_recipe → reasoning}/__init__.py +0 -0
/sdg_hub/configs/skills/{_G_.yaml → icl_examples/STEM.yaml} +0 -0
/sdg_hub/configs/skills/{data_recipe → icl_examples}/__init__.py +0 -0
/sdg_hub/configs/skills/{_A_.yaml → icl_examples/coding.yaml} +0 -0
/sdg_hub/configs/skills/{_B_.yaml → icl_examples/extraction.yaml} +0 -0
/sdg_hub/configs/skills/{_C_.yaml → icl_examples/humanities.yaml} +0 -0
/sdg_hub/configs/skills/{_D_.yaml → icl_examples/math.yaml} +0 -0
/sdg_hub/configs/skills/{_E_.yaml → icl_examples/reasoning.yaml} +0 -0
/sdg_hub/configs/skills/{_F_.yaml → icl_examples/roleplay.yaml} +0 -0
/sdg_hub/configs/skills/{_H_.yaml → icl_examples/writing.yaml} +0 -0
{sdg_hub-0.1.0a3.dist-info → sdg_hub-0.1.1.dist-info}/licenses/LICENSE +0 -0
{sdg_hub-0.1.0a3.dist-info → sdg_hub-0.1.1.dist-info}/top_level.txt +0 -0

sdg_hub/utils/parse_and_convert.py DELETED Viewed

@@ -1,392 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# Standard
-from enum import Enum
-from typing import Any
-import json
-import os
-import random
-import re
-import uuid
-# Third Party
-from datasets import Dataset
-import yaml
-# First Party
-# pylint: disable=ungrouped-imports
-from sdg_hub import utils
-from sdg_hub.logger_config import setup_logger
-from .datautils import safe_concatenate_datasets
-logger = setup_logger(__name__)
-class TaxonomyType(Enum):
-    KNOWLEDGE = "knowledge"
-    SKILL = "skill"
-def _unescape(s):
-    return bytes(s, "utf-8").decode("utf-8").strip()
-# This is a hack because the simple workflow returns a q/a pair as a single output.
-# We could possibly try to ask for them separately, but it would cost twice the inference
-# API calls. All of this is because the smallest models we use on small environments
-# for testing and demos weren't good enough to follow the strict formatting instructions used
-# in the full pipeline.
-def _get_question(synth_example: dict):
-    if "question" in synth_example:
-        return synth_example["question"]
-    if not synth_example.get("output"):
-        raise utils.GenerateException(
-            f"Error: output not found in synth_example: {synth_example}"
-        )
-    parts = synth_example["output"].split("?", 1)
-    if len(parts) != 2:
-        logger.warning(f"Failed to split generated q&a: {synth_example['output']}")
-    return parts[0].strip() + "?" if len(parts) == 2 else ""
-# This is also a hack. See the comment above _get_question.
-def _get_response(synth_example: dict):
-    if "response" in synth_example:
-        return synth_example["response"]
-    if "output" not in synth_example:
-        raise utils.GenerateException(
-            f"Error: output not found in synth_example: {synth_example}"
-        )
-    parts = synth_example["output"].split("?", 1)
-    if len(parts) != 2:
-        logger.warning(f"Failed to split generated q&a: {synth_example['output']}")
-    return parts[1].strip() if len(parts) == 2 else parts[0].strip()
-def _convert_to_hack_fmt(sample: dict, sys_prompt: str):
-    """
-    Convert a sample dictionary to contain 'system', 'user', and 'assistant' columns.
-    Note: We should remove this function in the future when we resolve this issue and
-    standardize the format to messages.
-    """
-    # Create user query message
-    user_query = _unescape(_get_question(sample))
-    response = _unescape(_get_response(sample))
-    if "context" in sample:
-        user_query = f"{sample['context']}\n\n{user_query}"
-    sample["id"] = str(uuid.uuid4())
-    sample["system"] = sys_prompt
-    sample["user"] = user_query
-    sample["assistant"] = response
-    return sample
-def _convert_to_messages(sample: dict, sys_prompt: str):
-    """
-    Convert a sample dictionary to contain 'messages'
-    and 'metadata' columns required for training.
-    """
-    # Create user query message
-    user_query = _unescape(_get_question(sample))
-    response = _unescape(_get_response(sample))
-    sample["id"] = str(uuid.uuid4())
-    sample["messages"] = [
-        {"content": sys_prompt, "role": "system"},
-        {"content": user_query, "role": "user"},
-        {"content": response, "role": "assistant"},
-    ]
-    return sample
-def create_auxiliary_dataset(generated_dataset: Dataset):
-    if "dataset_type" not in generated_dataset.column_names:
-        return None
-    # get module path of the current file
-    module_dir = os.path.dirname(os.path.abspath(__file__))
-    aux_inst_path = os.path.join(module_dir, "../configs/knowledge/auxilary_instructions.yaml")
-    if os.path.isfile(
-            aux_inst_path
-    ):
-        with open(aux_inst_path, "r", encoding="utf-8") as fp:
-            auxiliary_inst = yaml.safe_load(fp)
-    else:
-        logger.error(f"auxiliary instructions file not found at {aux_inst_path}")
-        return None
-    auxiliary_ds = generated_dataset.filter(
-        lambda x: x["dataset_type"] != "base_document"
-    )
-    unique_document_auxiliary = auxiliary_ds.to_pandas().drop_duplicates(
-        subset=["document"]
-    )
-    unique_document_auxiliary = Dataset.from_pandas(unique_document_auxiliary)
-    unique_document_auxiliary = unique_document_auxiliary.remove_columns(
-        [
-            col
-            for col in unique_document_auxiliary.column_names
-            if col
-            not in [
-                "raw_document",
-                "document_outline",
-                "domain",
-                "dataset_type",
-                "document",
-            ]
-        ]
-    )
-    unique_document_auxiliary = unique_document_auxiliary.rename_columns(
-        {"raw_document": "context", "document": "response"}
-    )
-    def __create_auxiliary_ds(rec):
-        instruction = random.choice(auxiliary_inst[rec["dataset_type"]])
-        messages = [
-            {"role": "user", "content": f"{rec['context']}\n\n{instruction}"},
-            {"role": "assistant", "content": rec["response"]},
-        ]
-        metadata = json.dumps(
-            {
-                "dataset_type": rec["dataset_type"],
-                "raw_document": rec["context"],
-                "dataset": f"document_{rec['dataset_type']}",
-                "domain": rec["domain"],
-            }
-        )
-        return {"messages": messages, "metadata": metadata, "id": str(uuid.uuid4())}
-    unique_document_auxiliary = unique_document_auxiliary.map(
-        __create_auxiliary_ds, remove_columns=unique_document_auxiliary.column_names
-    )
-    return unique_document_auxiliary
-def generate_knowledge_qa_dataset(
-    generated_dataset: Dataset, keep_context_separate=False
-):
-    def __create_qa_row(rec):
-        context = rec["document"]
-        instruction = rec["question"]
-        response = rec["response"]
-        metadata = {
-            "sdg_document": rec["document"],
-            "domain": rec["domain"],
-            "dataset": "document_knowledge_qa",
-        }
-        if "raw_document" in rec and "dataset_type" in rec:
-            metadata.update(
-                {
-                    "raw_document": rec["raw_document"],
-                    "dataset_type": rec["dataset_type"],
-                }
-            )
-        metadata = json.dumps(metadata)
-        if keep_context_separate:
-            messages = [
-                {"role": "user", "content": f"{instruction}"},
-                {"role": "assistant", "content": response},
-            ]
-            return {
-                "messages": messages,
-                "metadata": metadata,
-                "id": str(uuid.uuid4()),
-                "context": context,
-            }
-        else:
-            messages = [
-                {"role": "user", "content": f"{context}\n\n{instruction}"},
-                {"role": "assistant", "content": response},
-            ]
-            return {"messages": messages, "metadata": metadata, "id": str(uuid.uuid4())}
-    knowledge_ds = generated_dataset.map(
-        __create_qa_row, remove_columns=generated_dataset.column_names
-    )
-    return knowledge_ds
-def build_raft_dataset(ds: Dataset, p, num_doc_in_context=4):
-    all_context = list(set(ds["context"]))
-    def _pick_documents(rec, p):
-        answer_document = rec["context"]
-        selected_docs = [e for e in all_context if e != answer_document]
-        if len(selected_docs) > 0:
-            if len(selected_docs) < num_doc_in_context:
-                logger.info(
-                    f"Number of unique document is {len(selected_docs)} which is less than {num_doc_in_context}. Using all the documents in the RAFT context"
-                )
-            if random.uniform(0, 1) < p:
-                # golden/answer + distractor documents
-                docs = (
-                    random.sample(selected_docs, k=num_doc_in_context-1) + [answer_document]
-                    if len(selected_docs) >= (num_doc_in_context-1)
-                    else selected_docs + [answer_document]
-                )
-            else:
-                # distractor documents
-                docs = (
-                    random.sample(selected_docs, k=num_doc_in_context)
-                    if len(selected_docs) >= num_doc_in_context
-                    else selected_docs
-                )
-        else:
-            logger.info("Only 1 unique document found. Turning off RAFT styling")
-            docs = [answer_document]
-        random.shuffle(docs)
-        docs = "\n".join(([f"Document:\n{e}\n\n" for idx, e in enumerate(docs)]))
-        user_idx, user_msg = [
-            (idx, rec_msg)
-            for idx, rec_msg in enumerate(rec["messages"])
-            if rec_msg["role"] == "user"
-        ][0]
-        user_inst = user_msg["content"]
-        rec["messages"][user_idx]["content"] = f"{docs}\n\n{user_inst}"
-        rec["messages"] = rec["messages"]
-        metadata = json.loads(rec["metadata"])
-        metadata["dataset"] += f"_raft_p{p}"
-        rec["metadata"] = json.dumps(metadata)
-        return rec
-    ds = ds.map(_pick_documents, fn_kwargs={"p": p} , remove_columns=["context"])
-    return ds
-def _conv_pretrain(rec):
-    rec["messages"] = [
-        {
-            "role": "pretraining",
-            "content": f"<|user|>\n{rec['messages'][0]['content']}\n<|assistant|>\n{rec['messages'][1]['content']}",
-        }
-    ]
-    return rec
-def create_knowledge_regular_ds(generated_dataset: Dataset):
-    # Phase 1.0
-    knowledge_ds = generate_knowledge_qa_dataset(
-        generated_dataset, keep_context_separate=True
-    )
-    knowledge_ds = build_raft_dataset(knowledge_ds, p=0.4)
-    auxiliary_dataset = create_auxiliary_dataset(generated_dataset)
-    if auxiliary_dataset is not None:
-        transformed_data = safe_concatenate_datasets([knowledge_ds, auxiliary_dataset])
-    else:
-        transformed_data = knowledge_ds
-    return transformed_data
-def create_knowledge_pretraining_ds(generated_dataset: Dataset):
-    # Phase 0.7
-    knowledge_ds = generate_knowledge_qa_dataset(
-        generated_dataset, keep_context_separate=False
-    )
-    knowledge_ds = knowledge_ds.map(_conv_pretrain)
-    auxiliary_dataset = create_auxiliary_dataset(generated_dataset)
-    if auxiliary_dataset is not None:
-        auxiliary_dataset = auxiliary_dataset.map(_conv_pretrain)
-        transformed_data = safe_concatenate_datasets([knowledge_ds, auxiliary_dataset])
-    else:
-        transformed_data = knowledge_ds
-    return transformed_data
-def post_process_mcq(ds: Dataset, is_mmlu_eval: bool = False) -> Dataset:
-    """Filters out badly generated data, adds dataset type column
-    Args:
-        ds (Dataset): mcq generated dataset from mmmlu pipeline
-        is_mmlu_eval (bool, optional): _description_. Defaults to False.
-    Returns:
-        Dataset: Hf Dataset with new column, filtered dataset
-    """
-    ds = ds.filter(lambda x: ")" in x["mmlubench_answer"])
-    ds = ds.filter(lambda x: "A)" in x["mmlubench_question"])
-    ds = ds.add_column("dataset_type", ["mcq_qa"] * ds.num_rows)
-    if is_mmlu_eval:
-        return format_mmlu_style(ds)
-    return ds
-def extract_options(text: str) -> list[Any]:
-    """regex to extract options from mcq
-    Args:
-        text (str): question with options/mcq choices
-    Returns:
-        list[Any]: options under question that match the pattern.
-    """
-    # Use a regular expression to find patterns and capture the text after the letter and parenthesis
-    pattern = r"\b[A-Z]\) (.+)"
-    matches = re.findall(pattern, text)
-    return matches
-def format_mmlu_style(ds: Dataset) -> Dataset:
-    """Format the dataset according to lm-harness mmlu requirement.
-    Args:
-        ds (Dataset): input dataset
-    Returns:
-        Dataset: formated hf dataset
-    """
-    ds = ds.map(
-        lambda x: {"answer": x["mmlubench_answer"][: x["mmlubench_answer"].index(")")]}
-    )
-    ds = ds.map(lambda x: {"choices": extract_options(x["mmlubench_question"])})
-    ds = ds.map(
-        lambda x: {
-            "question": x["mmlubench_question"][
-                : x["mmlubench_question"].index("A)")
-            ].strip()
-        }
-    )
-    ds = ds.rename_columns({"domain": "subject"})
-    ds = ds.filter(lambda x: x["choices"])
-    ds = ds.filter(lambda x: len(x["choices"]) == 4)
-    ds = ds.filter(lambda x: x["answer"] in ["A", "B", "C", "D"])
-    ds = ds.class_encode_column("answer")
-    return ds
-def create_mmlu_evaluation_dataset(generate_mcq_dataset: Dataset) -> Dataset:
-    """Filter, format and return mcq dataset that is compatible with lm-harness for doing mmlu-style evaluation
-    Args:
-        generate_mcq_dataset (Dataset): sdg generated mcq dataset
-    Returns:
-        Dataset: MMLU MCQ datast
-    """
-    mmlu_dataset = post_process_mcq(generate_mcq_dataset, is_mmlu_eval=True)
-    return mmlu_dataset
-def create_mmlu_evaluation_yaml(task_name, eval_data_file_path, yaml_file_path):
-    """
-    Prepare Task Yaml that will be used in lm_eval_harness to evaluate knowledge using mmlu style metric
-    """
-    task_yaml = {
-        "task": task_name,
-        "dataset_kwargs": {"data_files": {"test": eval_data_file_path}},
-        "include": "_default_mmlu_pr_template_yaml",
-        "group": "mmlu_pr",
-    }
-    with open(yaml_file_path, "w", encoding="utf-8") as yaml_file:
-        yaml.dump(task_yaml, yaml_file, default_flow_style=False)

sdg_hub-0.1.0a3.dist-info/METADATA DELETED Viewed

@@ -1,154 +0,0 @@
-Metadata-Version: 2.4
-Name: sdg_hub
-Version: 0.1.0a3
-Summary: Synthetic Data Generation
-Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
-License: Apache-2.0
-Project-URL: homepage, https://ai-innovation.team/
-Project-URL: source, https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub
-Project-URL: issues, https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/issues
-Classifier: Development Status :: 3 - Alpha
-Classifier: Environment :: Console
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: MacOS :: MacOS X
-Classifier: Operating System :: POSIX :: Linux
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: Implementation :: CPython
-Requires-Python: >=3.9
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: click<9.0.0,>=8.1.7
-Requires-Dist: datasets<4.0.0,>=2.18.0
-Requires-Dist: httpx<1.0.0,>=0.25.0
-Requires-Dist: jinja2
-Requires-Dist: langchain-text-splitters
-Requires-Dist: openai<2.0.0,>=1.13.3
-Requires-Dist: rich
-Requires-Dist: tenacity!=8.4.0,>=8.3.0
-Requires-Dist: tqdm<5.0.0,>=4.66.2
-Dynamic: license-file
-# Synthetic Data Generation for LLMs
-The SDG Framework is a modular, scalable, and efficient solution for creating synthetic data generation workflows in a "no-code" manner. At its core, this framework is designed to simplify data creation for LLMs, allowing users to chain computational units and build powerful pipelines for generating data and processing tasks.
-## Core Design Principles
-The framework is built around the following principles:
-1. **Modular Design**: Highly composable blocks form the building units of the framework, allowing users to build workflows effortlessly.
-2. **No-Code Workflow Creation**: Specify workflows using simple YAML configuration files.
-3. **Scalability and Performance**: Optimized for handling large-scale workflows with millions of records.
----
-## Framework Architecture
-![overview](assets/imgs/overview.png)
-### Blocks: The Fundamental Unit
-At the heart of the framework is the **Block**. Each block is a self-contained computational unit that performs specific tasks, such as:
-- Making LLM calls
-- Performing data transformations
-- Applying filters
-Blocks are designed to be:
-- **Modular**: Reusable across multiple pipelines.
-- **Composable**: Easily chained together to create workflows.
-These blocks are implemented in the [src/sdg_hub/blocks](src/sdg_hub/blocks) directory.
-### Pipelines: Higher-Level Abstraction
-Blocks can be chained together to form a **Pipeline**. Pipelines enable:
-- Linear or recursive chaining of blocks.
-- Execution of complex workflows by chaining multiple pipelines together.
-### SDG Workflow: Full Workflow Automation
-Pipelines are further orchestrated into **SDG Workflows**, enabling seamless end-to-end processing. When invoking `sdg_hub.generate`, it triggers a pipeline/ or multiple pipelines that processes data through all the configured blocks.
----
-### YAML-Based Workflow: The Flow
-The YAML configuration file, known as the **Flow**, is central to defining data generation workflows in the SDG Framework. A Flow describes how blocks and pipelines are orchestrated to process and generate data efficiently. By leveraging YAML, users can create highly customizable and modular workflows without writing any code.
-#### Key Features of a Flow
-1. **Modular Design**:
-   - Flows are composed of blocks, which can be chained together into pipelines.
-   - Each block performs a specific task, such as generating, filtering, or transforming data.
-2. **Reusability**:
-   - Blocks and configurations defined in a Flow can be reused across different workflows.
-   - YAML makes it easy to tweak or extend workflows without significant changes.
-3. **Ease of Configuration**:
-   - Users can specify block types, configurations, and data processing details in a simple and intuitive manner.
----
-### Sample Flow
-Here is an example of a Flow configuration:
-```yaml
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_questions
-    config_path: configs/skills/freeform_questions.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - question
-    batch_kwargs:
-      num_samples: 30
-  drop_duplicates:
-    - question
-- block_type: FilterByValueBlock
-  block_config:
-    block_name: filter_questions
-    filter_column: score
-    filter_value: 1.0
-    operation: operator.eq
-    convert_dtype: float
-    batch_kwargs:
-      num_procs: 8
-  drop_columns:
-    - evaluation
-    - score
-    - num_samples
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_responses
-    config_path: configs/skills/freeform_responses.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - response
-```
-### Dataflow and Storage
-- **Data Representation**: Dataflow between blocks and pipelines is handled using **Hugging Face Datasets**, which are based on Arrow tables. This provides:
-  - Native parallelization capabilities (e.g., maps, filters).
-  - Support for efficient data transformations.
-- **Data Checkpoints**: Intermediate caches of generated data. Checkpoints allow users to:
-  - Resume workflows from the last successful state if interrupted.
-  - Improve reliability for long-running workflows.
----
-## Examples
-For sample use cases and implementation examples, please refer to the [examples](examples) directory. This directory contains various examples demonstrating different workflows and use cases of the SDG Framework.

sdg_hub-0.1.0a3.dist-info/RECORD DELETED Viewed

@@ -1,90 +0,0 @@
-sdg_hub/__init__.py,sha256=5Wa6onDndPvG4iwnjq2jK747t3-7XKdQn2WfHfq1sFc,67
-sdg_hub/_version.py,sha256=wrhrM1UZdxROWn7XOHbbPZa5jOBzV8tlSBMw233huBg,513
-sdg_hub/flow.py,sha256=3b97fMei1rWuQWeNfv-xyHKUbcMaf-d_b9Xms9J3BCQ,5425
-sdg_hub/logger_config.py,sha256=7uHEJVRfym1c4n95DOKHelLXqAus8uHsZYmzLsEjqpo,422
-sdg_hub/pipeline.py,sha256=u24ccryfy_nOSvsrWiynNmq1rOmOOkw1L5-TqJvuRSo,2339
-sdg_hub/prompts.py,sha256=dOiC9CsNbMt5Km9PnwyuW0v9zUs3cVXE5jZYwtXZTwc,1957
-sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/registry.py,sha256=Sc_HNxo4n0pgWMiEDd_sLjxaSXAMZFiHJIhQKqjywwk,3772
-sdg_hub/sdg.py,sha256=SXXnDGA3MpYlNpsw4XyImL97l0pXiF5P9jrDkZNlDJc,6492
-sdg_hub/blocks/__init__.py,sha256=OwPWofuBBWG7n0nYAXNtFXdq4rPf7FyvKkPfjUBlqec,130
-sdg_hub/blocks/block.py,sha256=ObJp8JaAhQ3lQK6SOYoqHPc7b2hBZMhOXEmIap_qa1k,1788
-sdg_hub/blocks/filterblock.py,sha256=leH0k3stcRzdCWoy8kI2hFruGJ0VUemeA4QBW1eQcdQ,2650
-sdg_hub/blocks/iterblock.py,sha256=7UZnK_JyQfbMhVNVzZ79TtEtADLuosI0z62LhoP63s4,958
-sdg_hub/blocks/llmblock.py,sha256=Jy5vWvcMpXphtv4JEc9Nyjs7lgcoF-Yp0gYx4d_Iopc,16156
-sdg_hub/blocks/rmblocks.py,sha256=nw0p1LytHO7Dmc8RGfJ5uajDQWM93-oNoYrzhaY2QEY,6222
-sdg_hub/blocks/utilblocks.py,sha256=nAehqcDKiDE5W3REGApytYAXztRm9AW65cAy95Ufb8U,4926
-sdg_hub/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/annotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/annotations/cot_reflection.yaml,sha256=60EdsTe1y7GoUIAWYSGfMa3EKI3oLZKCvDuKU7wHgQU,1737
-sdg_hub/configs/annotations/detailed_description.yaml,sha256=FsGbQMBxf1MAOi0nhrQ4icxcwYMlRura_ji9Pmeh1AA,192
-sdg_hub/configs/annotations/detailed_description_icl.yaml,sha256=NDdwo5EShnYZjm1Fn80sZTAwfnwpPigixP2hvJ8--cU,679
-sdg_hub/configs/annotations/simple.yaml,sha256=C89QyC4DGJqdsr6mW3iqfUcAOj5rMZZSesmMdFoACuM,199
-sdg_hub/configs/knowledge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/knowledge/atomic_facts.yaml,sha256=9icyigsMooyBR_nEwWgj9eBAnuc3kMZMNnEy6AxFSKU,2430
-sdg_hub/configs/knowledge/auxilary_instructions.yaml,sha256=aCgIjvNacdC2ZHThEvhZKvwORK6KqErVvVYQYQrIDLE,2034
-sdg_hub/configs/knowledge/detailed_summary.yaml,sha256=PBymlZljkzN8kbo5DgmNsSM_Xb76SZifuS5Yl-x4Uy4,365
-sdg_hub/configs/knowledge/evaluate_faithfulness.yaml,sha256=iuvx5vNNm_jzHlmcKF83StaDYezRz2vQn3JUHM-TMdQ,3054
-sdg_hub/configs/knowledge/evaluate_question.yaml,sha256=02mikEAJCUEkREBo7KxPY9H6iTUHQN-4cRkn2XMlVQ8,1915
-sdg_hub/configs/knowledge/evaluate_relevancy.yaml,sha256=ASh8A1HAYO1h1tQRrwGnkUmK1n-WDKLdfW_LbSW1ipQ,3690
-sdg_hub/configs/knowledge/extractive_summary.yaml,sha256=06Z9lDiZUsQEURhpwWUVXA3wYO3bRaC0aNoGCpo3-44,376
-sdg_hub/configs/knowledge/generate_code_questions_responses.yaml,sha256=cIus2JYMYDvxHFVSU9QVa-1IK5KoChb3rCU2b4b9UmI,908
-sdg_hub/configs/knowledge/generate_questions_responses.yaml,sha256=H9nb_5xGP7k6HtC3VboXqpiI5kQ9Xp3vjhXH3YIFesk,2525
-sdg_hub/configs/knowledge/mcq_generation.yaml,sha256=d4VKegnVIexwCn0e2AJs-0DC6XdLyUBGaCsQVwzICUE,3152
-sdg_hub/configs/knowledge/router.yaml,sha256=9m_cX3xl808Vwrcq2PACyX45QFPkrV2nVYIY8x10JBU,119
-sdg_hub/configs/knowledge/simple_generate_qa.yaml,sha256=OsuZP9SxQeUhTsHdhUO10mnjJ1u_6xekW5IQucFpRco,1565
-sdg_hub/configs/knowledge/data_recipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/knowledge/data_recipe/default_recipe.yaml,sha256=mB4uQifuS9F5ewKtxwd93XM5yZTZfSqiXxKhdT8bYT8,232
-sdg_hub/configs/reasoning/dynamic_cot.yaml,sha256=6XY_mFpB_oKFQ7U2CmHTqkJRGVHgOvpNmIDfhksYW6o,2641
-sdg_hub/configs/skills/_A_.yaml,sha256=a5m-pUcV9xUb54gQ5U3vsU1RBXzOmsfX0CjTW7U62zo,5240
-sdg_hub/configs/skills/_B_.yaml,sha256=P751l6NvFRkINWz-bX5jgnd_if2bl3d_NlhGI7g81xw,4654
-sdg_hub/configs/skills/_C_.yaml,sha256=tZyiJ4Q3gG4uuoDXw6g__lX3ySEUaRZW2GhW1ustwaM,11370
-sdg_hub/configs/skills/_D_.yaml,sha256=hNq-QudlXrg9CWLpJdrZ4v3vifGTWhyp2gcfwPdR3_o,6776
-sdg_hub/configs/skills/_E_.yaml,sha256=eesIlH9SO07TVF20gy18MZrcDzLhSmynd_F_lvg0oQg,4335
-sdg_hub/configs/skills/_F_.yaml,sha256=LYEyA7wv7QWQscUNQr0K_lotNoWSfuoAEncx3PCRYIs,6997
-sdg_hub/configs/skills/_G_.yaml,sha256=5dcLC5jXOEeDasBkTunnHYrlddI3HcHYnEAXZcrd0ds,8412
-sdg_hub/configs/skills/_H_.yaml,sha256=El-57IjZ5IvdcmCHyHvX_M2RFFkEos572220be8ecrQ,11335
-sdg_hub/configs/skills/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/skills/analyzer.yaml,sha256=QBtyjaU6HBZqzNOmev_W4_scn_hH7Rfxd2xL_LcPLho,2261
-sdg_hub/configs/skills/annotation.yaml,sha256=k5nJ357kUr0Uvq7Hkt3Ey22UbgSjgSjIomjHFfjaQnY,916
-sdg_hub/configs/skills/contexts.yaml,sha256=xSFB6_VmNkEixmqv0RKo6_9CI1i5SD4zvwyJtAIc0vk,1206
-sdg_hub/configs/skills/critic.yaml,sha256=Dr7anOKa7Xx1oDonXzsCfXwKIl4hUTArx2Sb_rgpLQI,1808
-sdg_hub/configs/skills/evaluate_freeform_pair.yaml,sha256=waszWejwK8hkNw7xl70H4FIIOAt9SL4R4ufZVkTvl-c,3026
-sdg_hub/configs/skills/evaluate_freeform_questions.yaml,sha256=peyoumtMh_OAQJxPN02Yb3M4gP_2B8czVgbRYC4Np94,2116
-sdg_hub/configs/skills/evaluate_grounded_pair.yaml,sha256=PzkuY491f9-jDwFy3Xm_y4A6ebIcpAaJ6FtGiAYLVWg,3181
-sdg_hub/configs/skills/evaluate_grounded_questions.yaml,sha256=9yr97azFhMdOfYp11BFtDSIhhP4wjQMOxYZnKWKlCPU,3115
-sdg_hub/configs/skills/freeform_questions.yaml,sha256=5mkwtJDKuFz0U8W8HTMXvYV8mXZaWyN1IDf2cLHO7gg,1512
-sdg_hub/configs/skills/freeform_responses.yaml,sha256=_BLeR2DDOHpXHn2TYMwD4deGLW2Ae2kgeJuEEu3qJGU,1492
-sdg_hub/configs/skills/grounded_questions.yaml,sha256=t6pKjt5Fp_ThZueB7JBrUKuQLQY_At-Y9O67OtrIXMo,1898
-sdg_hub/configs/skills/grounded_responses.yaml,sha256=kVOeBp3BjKCFKG2qConXIQVVPI1EgcKJgKn6DFAkl1s,1860
-sdg_hub/configs/skills/judge.yaml,sha256=FxnJA_wdmyMyMqGEZDAT8hc2itO845mGDNXgpmV2EUU,3203
-sdg_hub/configs/skills/planner.yaml,sha256=yNF6t0EnmwYt1EV9Y3-vkmPcbOQRtvoLr8MITuiUw_A,2086
-sdg_hub/configs/skills/respond.yaml,sha256=K1Q5X5_Q1k60hNDbHDjMYBzxbyOIEEHTQcXW6qQ4Ve0,108
-sdg_hub/configs/skills/revised_responder.yaml,sha256=rjypOJbhZV9PuOD9YhlYgymxOJV8Zdzzz54x6Fxn2bY,2875
-sdg_hub/configs/skills/router.yaml,sha256=CIfea7uIycwGO4cC5a_cBH_OBFZ0F1grO8TE2VCEpAI,121
-sdg_hub/configs/skills/simple_generate_qa_freeform.yaml,sha256=j8cJtEKSvtA__rE08iU6oz2XnfIgj0HiLVL8-6RhK3c,1431
-sdg_hub/configs/skills/simple_generate_qa_grounded.yaml,sha256=tvX9EN5TArFesOOqpdN3hb-IHe7O82a2twQd-gzyCgw,1500
-sdg_hub/configs/skills/data_recipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sdg_hub/configs/skills/data_recipe/default_recipe.yaml,sha256=z1432g7gqKOan57vr72gk9QTC7p7xNkxGsaJMeO_yDY,296
-sdg_hub/flows/annotation/emotion/detailed_description.yaml,sha256=TmiTDFo3jPbXjQIgmm-QJg66nPqKMxmMYFa1clm3AjY,422
-sdg_hub/flows/annotation/emotion/detailed_description_icl.yaml,sha256=1Vk3iKMn1HJX7AIthS8Z2pd0y6WTQ6qWXT-w8J2MggE,426
-sdg_hub/flows/annotation/emotion/simple.yaml,sha256=eX7I8IngXoOklaDgWuJU2X12QLZ5qPAo5WMcI1qadDo,408
-sdg_hub/flows/generation/knowledge/mmlu_bench.yaml,sha256=Rueuxr_n1zabE_nGqOgUfh5hqVmEONRka9NLiZANSew,346
-sdg_hub/flows/generation/knowledge/simple_knowledge.yaml,sha256=o4uyfs1nDiECcNROdsvHKiM46NYvQufo9dF4XSGpY54,298
-sdg_hub/flows/generation/knowledge/synth_knowledge.yaml,sha256=ZTZvevfwDQSKUwPcv1i5IzIchsRHSEN03eTefedQmU8,2172
-sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml,sha256=aVnHkp0DkeuVgWdZ2eUQf5-uzI8tPYOkrs27yoF8m5g,3393
-sdg_hub/flows/generation/skills/agentic_improve_skill.yaml,sha256=XvdvUsn-mi4TmLwCbQ-5sYQqvaxfSTTNXqSfxzO0RXo,2750
-sdg_hub/flows/generation/skills/simple_freeform_skill.yaml,sha256=iVEomFH1E52JA7KLmTIwkS1PnzxUJVPMgbK2O-m80As,309
-sdg_hub/flows/generation/skills/simple_grounded_skill.yaml,sha256=LTLxqdgbLIKSJonuIRHhcRSpit1EawwNvytWzXWXe2E,309
-sdg_hub/flows/generation/skills/synth_grounded_skills.yaml,sha256=91Dm--agpmbm02hIVnFhEndjppKsQEWXDbckR9GAzKM,2045
-sdg_hub/flows/generation/skills/synth_skills.yaml,sha256=PhUP2iBo4RkeFafSW-qxh4WmX_ZTfGi0UAmwN_XSTqs,1504
-sdg_hub/utils/__init__.py,sha256=UEo-9qPt5iVKBIRvgZhOI0SoIBO6zeBxOuLvUQXaM3g,185
-sdg_hub/utils/chunking.py,sha256=VSPQ8dSFI5LF4sefcI0tzWG0Vc1rM_FSMTO6xg_iFzA,2556
-sdg_hub/utils/datautils.py,sha256=0t_SZ_UXBKl8uL6rVp3SUh8YKRbzKlh2oO5gr2cKyEw,389
-sdg_hub/utils/docprocessor.py,sha256=Z4J2DfLhRxMCeIeMKttwi-FdivmPqI-hjEwq6-Ub35c,12485
-sdg_hub/utils/parse_and_convert.py,sha256=I27FdS-H2mSoZ07SsKZmNYM2F_Cg7GHTBXD7YNgASNw,13443
-sdg_hub-0.1.0a3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-sdg_hub-0.1.0a3.dist-info/METADATA,sha256=vUusH0jLACOcoxvTL-e5dAPfhoTV--zgs_MJ-6IYQfQ,5847
-sdg_hub-0.1.0a3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-sdg_hub-0.1.0a3.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
-sdg_hub-0.1.0a3.dist-info/RECORD,,