PyPI - sdg-hub - Versions diffs - 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

sdg-hub 0.1.4py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

sdg_hub/__init__.py +28 -1
sdg_hub/_version.py +2 -2
sdg_hub/core/__init__.py +22 -0
sdg_hub/core/blocks/__init__.py +58 -0
sdg_hub/core/blocks/base.py +313 -0
sdg_hub/core/blocks/deprecated_blocks/__init__.py +29 -0
sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +93 -0
sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +88 -0
sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +103 -0
sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +94 -0
sdg_hub/core/blocks/deprecated_blocks/llmblock.py +479 -0
sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +88 -0
sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +58 -0
sdg_hub/core/blocks/deprecated_blocks/selector.py +97 -0
sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +88 -0
sdg_hub/core/blocks/evaluation/__init__.py +9 -0
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +564 -0
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +564 -0
sdg_hub/core/blocks/evaluation/verify_question_block.py +564 -0
sdg_hub/core/blocks/filtering/__init__.py +12 -0
sdg_hub/core/blocks/filtering/column_value_filter.py +188 -0
sdg_hub/core/blocks/llm/__init__.py +27 -0
sdg_hub/core/blocks/llm/client_manager.py +398 -0
sdg_hub/core/blocks/llm/config.py +336 -0
sdg_hub/core/blocks/llm/error_handler.py +368 -0
sdg_hub/core/blocks/llm/llm_chat_block.py +542 -0
sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +491 -0
sdg_hub/core/blocks/llm/prompt_builder_block.py +368 -0
sdg_hub/core/blocks/llm/text_parser_block.py +357 -0
sdg_hub/core/blocks/registry.py +331 -0
sdg_hub/core/blocks/transform/__init__.py +23 -0
sdg_hub/core/blocks/transform/duplicate_columns.py +88 -0
sdg_hub/core/blocks/transform/index_based_mapper.py +225 -0
sdg_hub/core/blocks/transform/melt_columns.py +126 -0
sdg_hub/core/blocks/transform/rename_columns.py +69 -0
sdg_hub/core/blocks/transform/text_concat.py +102 -0
sdg_hub/core/blocks/transform/uniform_col_val_setter.py +101 -0
sdg_hub/core/flow/__init__.py +20 -0
sdg_hub/core/flow/base.py +1209 -0
sdg_hub/core/flow/checkpointer.py +333 -0
sdg_hub/core/flow/metadata.py +389 -0
sdg_hub/core/flow/migration.py +198 -0
sdg_hub/core/flow/registry.py +393 -0
sdg_hub/core/flow/validation.py +277 -0
sdg_hub/{utils → core/utils}/__init__.py +7 -4
sdg_hub/core/utils/datautils.py +63 -0
sdg_hub/core/utils/error_handling.py +208 -0
sdg_hub/core/utils/flow_id_words.yaml +231 -0
sdg_hub/core/utils/flow_identifier.py +94 -0
sdg_hub/{utils → core/utils}/path_resolution.py +2 -2
sdg_hub/core/utils/yaml_utils.py +59 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml +40 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/detailed_summary.yaml +13 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_faithfulness.yaml +64 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml +29 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml +81 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml +13 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +192 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml +54 -0
sdg_hub-0.2.1.dist-info/METADATA +221 -0
sdg_hub-0.2.1.dist-info/RECORD +68 -0
sdg_hub/blocks/__init__.py +0 -42
sdg_hub/blocks/block.py +0 -96
sdg_hub/blocks/llmblock.py +0 -375
sdg_hub/blocks/openaichatblock.py +0 -556
sdg_hub/blocks/utilblocks.py +0 -597
sdg_hub/checkpointer.py +0 -139
sdg_hub/configs/annotations/cot_reflection.yaml +0 -34
sdg_hub/configs/annotations/detailed_annotations.yaml +0 -28
sdg_hub/configs/annotations/detailed_description.yaml +0 -10
sdg_hub/configs/annotations/detailed_description_icl.yaml +0 -32
sdg_hub/configs/annotations/simple_annotations.yaml +0 -9
sdg_hub/configs/knowledge/__init__.py +0 -0
sdg_hub/configs/knowledge/atomic_facts.yaml +0 -46
sdg_hub/configs/knowledge/auxilary_instructions.yaml +0 -35
sdg_hub/configs/knowledge/detailed_summary.yaml +0 -18
sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +0 -68
sdg_hub/configs/knowledge/evaluate_question.yaml +0 -38
sdg_hub/configs/knowledge/evaluate_relevancy.yaml +0 -84
sdg_hub/configs/knowledge/extractive_summary.yaml +0 -18
sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +0 -39
sdg_hub/configs/knowledge/generate_questions.yaml +0 -82
sdg_hub/configs/knowledge/generate_questions_responses.yaml +0 -56
sdg_hub/configs/knowledge/generate_responses.yaml +0 -86
sdg_hub/configs/knowledge/mcq_generation.yaml +0 -83
sdg_hub/configs/knowledge/router.yaml +0 -12
sdg_hub/configs/knowledge/simple_generate_qa.yaml +0 -34
sdg_hub/configs/reasoning/__init__.py +0 -0
sdg_hub/configs/reasoning/dynamic_cot.yaml +0 -40
sdg_hub/configs/skills/__init__.py +0 -0
sdg_hub/configs/skills/analyzer.yaml +0 -48
sdg_hub/configs/skills/annotation.yaml +0 -36
sdg_hub/configs/skills/contexts.yaml +0 -28
sdg_hub/configs/skills/critic.yaml +0 -60
sdg_hub/configs/skills/evaluate_freeform_pair.yaml +0 -111
sdg_hub/configs/skills/evaluate_freeform_questions.yaml +0 -78
sdg_hub/configs/skills/evaluate_grounded_pair.yaml +0 -119
sdg_hub/configs/skills/evaluate_grounded_questions.yaml +0 -51
sdg_hub/configs/skills/freeform_questions.yaml +0 -34
sdg_hub/configs/skills/freeform_responses.yaml +0 -39
sdg_hub/configs/skills/grounded_questions.yaml +0 -38
sdg_hub/configs/skills/grounded_responses.yaml +0 -59
sdg_hub/configs/skills/icl_examples/STEM.yaml +0 -56
sdg_hub/configs/skills/icl_examples/__init__.py +0 -0
sdg_hub/configs/skills/icl_examples/coding.yaml +0 -97
sdg_hub/configs/skills/icl_examples/extraction.yaml +0 -36
sdg_hub/configs/skills/icl_examples/humanities.yaml +0 -71
sdg_hub/configs/skills/icl_examples/math.yaml +0 -85
sdg_hub/configs/skills/icl_examples/reasoning.yaml +0 -30
sdg_hub/configs/skills/icl_examples/roleplay.yaml +0 -45
sdg_hub/configs/skills/icl_examples/writing.yaml +0 -80
sdg_hub/configs/skills/judge.yaml +0 -53
sdg_hub/configs/skills/planner.yaml +0 -67
sdg_hub/configs/skills/respond.yaml +0 -8
sdg_hub/configs/skills/revised_responder.yaml +0 -78
sdg_hub/configs/skills/router.yaml +0 -59
sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +0 -27
sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +0 -31
sdg_hub/flow.py +0 -477
sdg_hub/flow_runner.py +0 -450
sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +0 -13
sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +0 -12
sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +0 -89
sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +0 -136
sdg_hub/flows/generation/skills/improve_responses.yaml +0 -103
sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +0 -12
sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +0 -12
sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +0 -80
sdg_hub/flows/generation/skills/synth_skills.yaml +0 -59
sdg_hub/pipeline.py +0 -121
sdg_hub/prompts.py +0 -80
sdg_hub/registry.py +0 -122
sdg_hub/sdg.py +0 -206
sdg_hub/utils/config_validation.py +0 -91
sdg_hub/utils/datautils.py +0 -14
sdg_hub/utils/error_handling.py +0 -94
sdg_hub/utils/validation_result.py +0 -10
sdg_hub-0.1.4.dist-info/METADATA +0 -190
sdg_hub-0.1.4.dist-info/RECORD +0 -89
sdg_hub/{logger_config.py → core/utils/logger_config.py} +1 -1
/sdg_hub/{configs/__init__.py → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md} +0 -0
/sdg_hub/{configs/annotations → flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab}/__init__.py +0 -0
{sdg_hub-0.1.4.dist-info → sdg_hub-0.2.1.dist-info}/WHEEL +0 -0
{sdg_hub-0.1.4.dist-info → sdg_hub-0.2.1.dist-info}/licenses/LICENSE +0 -0
{sdg_hub-0.1.4.dist-info → sdg_hub-0.2.1.dist-info}/top_level.txt +0 -0

sdg_hub/flows/generation/skills/improve_responses.yaml DELETED Viewed

@@ -1,103 +0,0 @@
-- block_type: LLMBlock
-  block_config:
-    block_name: router
-    config_path: configs/skills/router.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - route
-  gen_kwargs:
-    temperature: 0
-    max_tokens: 5
-    extra_body:
-      guided_choice:
-        - "coding"
-        - "extraction"
-        - "humanities"
-        - "math"
-        - "reasoning"
-        - "roleplay"
-        - "STEM"
-        - "writing"
-- block_type: SamplePopulatorBlock
-  block_config:
-    block_name: icl_populator
-    config_paths:
-      - configs/skills/icl_examples/coding.yaml
-      - configs/skills/icl_examples/extraction.yaml
-      - configs/skills/icl_examples/humanities.yaml
-      - configs/skills/icl_examples/math.yaml
-      - configs/skills/icl_examples/reasoning.yaml
-      - configs/skills/icl_examples/roleplay.yaml
-      - configs/skills/icl_examples/STEM.yaml
-      - configs/skills/icl_examples/writing.yaml
-    column_name: route
-    batch_kwargs:
-      num_procs: 8
-- block_type: LLMBlock
-  block_config:
-    block_name: analyzer
-    config_path: configs/skills/analyzer.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - analysis
-      - rubric
-- block_type: LLMBlock
-  block_config:
-    block_name: critic
-    config_path: configs/skills/critic.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - critique
-- block_type: LLMBlock
-  block_config:
-    block_name: planner
-    config_path: configs/skills/planner.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - plan
-- block_type: LLMBlock
-  block_config:
-    block_name: revised_responder
-    config_path: configs/skills/revised_responder.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - revised_response
-  drop_columns:
-    - icl_query
-    - icl_response
-    - icl_analysis
-    - icl_rubric
-    - icl_critique
-    - icl_plan
-    - icl_revised_response
-- block_type: LLMBlock
-  block_config:
-    block_name: judge
-    config_path: configs/skills/judge.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - judgement
-      - verdict
-- block_type: FilterByValueBlock
-  block_config:
-    block_name: filter_judgement
-    filter_column: verdict
-    filter_value:
-      - Assistant A
-      - Assistant B
-    operation: operator.contains
-    batch_kwargs:
-      num_procs: 8
-- block_type: SelectorBlock
-  block_config:
-    block_name: response_selector
-    choice_map:
-       Assistant A: "response"
-       Assistant B: "revised_response"
-    choice_col: verdict
-    output_col: chosen_response
-    batch_kwargs:
-      num_procs: 8
-  drop_columns:
-    - judgemnent
-    - verdict

sdg_hub/flows/generation/skills/simple_freeform_skill.yaml DELETED Viewed

@@ -1,12 +0,0 @@
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_skill_freeform
-    config_path: configs/skills/simple_generate_qa_freeform.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - output
-  gen_kwargs:
-    temperature: 0.7
-    max_tokens: 2048
-  drop_duplicates:
-    - output

sdg_hub/flows/generation/skills/simple_grounded_skill.yaml DELETED Viewed

@@ -1,12 +0,0 @@
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_skill_grounded
-    config_path: configs/skills/simple_generate_qa_grounded.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - output
-  gen_kwargs:
-    temperature: 0.7
-    max_tokens: 2048
-  drop_duplicates:
-    - output

sdg_hub/flows/generation/skills/synth_grounded_skills.yaml DELETED Viewed

@@ -1,80 +0,0 @@
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_contexts
-    config_path: configs/skills/contexts.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - context
-  gen_kwargs:
-    temperature: 0.7
-    max_tokens: 2048
-    n: 10
-    seed: 42
-  drop_duplicates:
-    - context
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_grounded_questions
-    config_path: configs/skills/grounded_questions.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - question
-    batch_kwargs:
-      num_samples: 3
-  drop_duplicates:
-    - question
-- block_type: LLMBlock
-  block_config:
-    block_name: eval_grounded_questions
-    config_path: configs/skills/evaluate_grounded_questions.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - evaluation
-      - score
-- block_type: FilterByValueBlock
-  block_config:
-    block_name: filter_grounded_questions
-    filter_column: score
-    filter_value: 1.0
-    operation: operator.eq
-    convert_dtype: float
-    batch_kwargs:
-      num_procs: 8
-  drop_columns:
-    - evaluation
-    - score
-    - num_samples
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_grounded_responses
-    config_path: configs/skills/grounded_responses.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - response
-- block_type: LLMBlock
-  block_config:
-    block_name: evaluate_grounded_qa_pair
-    config_path: configs/skills/evaluate_grounded_pair.yaml
-    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
-    output_cols:
-      - evaluation
-      - score
-- block_type: FilterByValueBlock
-  block_config:
-    block_name: filter_grounded_qa_pair
-    filter_column: score
-    filter_value: 2.0
-    operation: operator.ge
-    convert_dtype: float
-    batch_kwargs:
-      num_procs: 8
-- block_type: CombineColumnsBlock
-  block_config:
-    block_name: combine_question_and_context
-    columns:
-      - context
-      - question
-    output_col: question
-    batch_kwargs:
-      num_procs: 8
-      batched: True

sdg_hub/flows/generation/skills/synth_skills.yaml DELETED Viewed

@@ -1,59 +0,0 @@
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_questions
-    config_path: configs/skills/freeform_questions.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - question
-    batch_kwargs:
-      num_samples: 30
-  drop_duplicates:
-    - question
-- block_type: LLMBlock
-  block_config:
-    block_name: eval_questions
-    config_path: configs/skills/evaluate_freeform_questions.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - evaluation
-      - score
-- block_type: FilterByValueBlock
-  block_config:
-    block_name: filter_questions
-    filter_column: score
-    filter_value: 1.0
-    operation: operator.eq
-    convert_dtype: float
-    batch_kwargs:
-      num_procs: 8
-  drop_columns:
-    - evaluation
-    - score
-    - num_samples
-- block_type: LLMBlock
-  block_config:
-    block_name: gen_responses
-    config_path: configs/skills/freeform_responses.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - response
-- block_type: LLMBlock
-  block_config:
-    block_name: evaluate_qa_pair
-    config_path: configs/skills/evaluate_freeform_pair.yaml
-    model_id: meta-llama/Llama-3.3-70B-Instruct
-    output_cols:
-      - evaluation
-      - score
-- block_type: FilterByValueBlock
-  block_config:
-    block_name: filter_qa_pair
-    filter_column: score
-    filter_value: 2.0
-    operation: operator.ge
-    convert_dtype: float
-    batch_kwargs:
-      num_procs: 8
-  drop_columns:
-    - evaluation
-    - score

sdg_hub/pipeline.py DELETED Viewed

@@ -1,121 +0,0 @@
-"""
-Deprecated Pipeline class for data generation pipelines.
-Use the Flow class directly for new code.
-"""
-# SPDX-License-Identifier: Apache-2.0
-# Standard
-import warnings
-from typing import List, Dict, Any
-# Third Party
-from datasets import Dataset
-from datasets.data_files import EmptyDatasetError
-# Local
-from .logger_config import setup_logger
-logger = setup_logger(__name__)
-class Pipeline:
-    """A class representing a data generation pipeline.
-    This class is deprecated and will be removed in a future version.
-    Use the Flow class directly instead.
-    Parameters
-    ----------
-    chained_blocks : List[Dict[str, Any]]
-        List of block configurations to execute in sequence.
-    Attributes
-    ----------
-    chained_blocks : List[Dict[str, Any]]
-        List of block configurations to execute in sequence.
-    """
-    def __init__(self, chained_blocks: List[Dict[str, Any]]) -> None:
-        """
-        Initialize the Pipeline class with a configuration dictionary.
-        DEPRECATED: This class is deprecated and will be removed in a future version.
-        Use the Flow class directly instead.
-        """
-        warnings.warn(
-            "Pipeline class is deprecated and will be removed in a future version. "
-            "Use Flow class directly instead of wrapping it with Pipeline.",
-            DeprecationWarning,
-            stacklevel=2
-        )
-        # pipeline config is the run configuration that consists of the pipeline steps
-        self.chained_blocks = chained_blocks
-    def _drop_duplicates(self, dataset: Dataset, cols: List[str]) -> Dataset:
-        """Drop duplicates from the dataset based on the columns provided.
-        Parameters
-        ----------
-        dataset : Dataset
-            The input dataset.
-        cols : List[str]
-            Columns to consider for duplicate detection.
-        Returns
-        -------
-        Dataset
-            Dataset with duplicates removed.
-        """
-        df = dataset.to_pandas()
-        df = df.drop_duplicates(subset=cols).reset_index(drop=True)
-        return Dataset.from_pandas(df)
-    def generate(self, dataset: Dataset) -> Dataset:
-        """Generate the dataset by running the pipeline steps.
-        Parameters
-        ----------
-        dataset : Dataset
-            The input dataset to process.
-        Returns
-        -------
-        Dataset
-            The processed dataset.
-        Raises
-        ------
-        EmptyDatasetError
-            If a block produces an empty dataset.
-        """
-        for block_prop in self.chained_blocks:
-            block_type = block_prop["block_type"]
-            block_config = block_prop["block_config"]
-            drop_columns = block_prop.get("drop_columns", [])
-            gen_kwargs = block_prop.get("gen_kwargs", {})
-            drop_duplicates_cols = block_prop.get("drop_duplicates", False)
-            block = block_type(**block_config)
-            logger.debug("------------------------------------\n")
-            logger.debug("Running block: %s", block_config["block_name"])
-            logger.debug("Input dataset: %s", dataset)
-            dataset = block.generate(dataset, **gen_kwargs)
-            if len(dataset) == 0:
-                raise EmptyDatasetError(
-                    f"Pipeline stopped: Empty dataset after running block: {block_config['block_name']}"
-                )
-            drop_columns_in_ds = [e for e in drop_columns if e in dataset.column_names]
-            if drop_columns:
-                dataset = dataset.remove_columns(drop_columns_in_ds)
-            if drop_duplicates_cols:
-                dataset = self._drop_duplicates(dataset, cols=drop_duplicates_cols)
-            logger.debug("Output dataset: %s", dataset)
-            logger.debug("------------------------------------\n\n")
-        return dataset

sdg_hub/prompts.py DELETED Viewed

@@ -1,80 +0,0 @@
-# Local
-from .registry import PromptRegistry
-@PromptRegistry.register("blank")
-def blank_chat_template():
-    return """{{ messages }}"""
-@PromptRegistry.register("instructlab")
-def instructlab_chat_template():
-    return """{% for message in messages %}{% if message['role'] == 'pretraining' %}{{ '<|pretrain|>' + message['content'] + '<|endoftext|>' + '<|/pretrain|>' }}{% elif message['role'] == 'system' %}{{ '<|system|>' + '\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>' + '\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>' + '\n' + message['content'] + '<|endoftext|>' + ('' if loop.last else '\n') }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>' + '\n' }}{% endif %}{% endfor %}"""
-@PromptRegistry.register("mistralai/Mixtral")
-def mistral_chat_template():
-    return """{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n<s>\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + '</s>'}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n"""
-@PromptRegistry.register("meta-llama/Llama-3.3")
-def meta_llama_chat_template():
-    return """{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n"""
-@PromptRegistry.register("microsoft/phi-4")
-def microsoft_phi_chat_template():
-    return """{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}"""
-@PromptRegistry.register("nvidia/Llama-3_3-Nemotron-Super-49B-v1")
-def nemotron_chat_template():
-    """
-    Format chat messages for the Nemotron model, including a system prompt and structured message headers.
-    The template starts with a system message containing "detailed thinking on", then iterates over messages, wrapping each with start and end header tokens and an end-of-text token. For assistant messages containing a `</think>` tag, only the content after this tag is included. Optionally appends an assistant prompt if generation is requested.
-    """
-    return """{{- bos_token }}
-{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}detailed thinking on{{- "<|eot_id|>" }}
-{%- for message in messages %}
-  {%- if message['role'] == 'assistant' and '</think>' in message['content'] %}
-    {%- set content = message['content'].split('</think>')[-1].lstrip() %}
-  {%- else %}
-    {%- set content = message['content'] %}
-  {%- endif %}
-  {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + content | trim + '<|eot_id|>' }}
-{%- endfor %}
-{%- if add_generation_prompt %}
-  {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
-{%- endif %}"""
-@PromptRegistry.register("Qwen/Qwen2.5")
-def qwen_2_5_chat_template():
-    """
-    Formats chat messages into the prompt structure required by the Qwen 2.5 model family, supporting system messages, tool descriptions, function call instructions, and role-based message formatting.
-    If tools are provided, includes tool signatures and instructions for function calls in the system prompt. User, assistant, and tool messages are wrapped with special tokens, and assistant tool calls are serialized as JSON within XML tags. Optionally appends a generation prompt for the assistant.
-    """
-    return """{%- if tools %}\n    {{- \'<|im_start|>system\\n\' }}\n    {%- if messages[0][\'role\'] == \'system\' %}\n        {{- messages[0][\'content\'] }}\n    {%- else %}\n        {{- \'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\' }}\n    {%- endif %}\n    {{- "\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}\n    {%- for tool in tools %}\n        {{- "\\n" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}\n{%- else %}\n    {%- if messages[0][\'role\'] == \'system\' %}\n        {{- \'<|im_start|>system\\n\' + messages[0][\'content\'] + \'<|im_end|>\\n\' }}\n    {%- else %}\n        {{- \'<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n\' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}\n        {{- \'<|im_start|>\' + message.role + \'\\n\' + message.content + \'<|im_end|>\' + \'\\n\' }}\n    {%- elif message.role == "assistant" %}\n        {{- \'<|im_start|>\' + message.role }}\n        {%- if message.content %}\n            {{- \'\\n\' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- \'\\n<tool_call>\\n{"name": "\' }}\n            {{- tool_call.name }}\n            {{- \'", "arguments": \' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \'}\\n</tool_call>\' }}\n        {%- endfor %}\n        {{- \'<|im_end|>\\n\' }}\n    {%- elif message.role == "tool" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}\n            {{- \'<|im_start|>user\' }}\n        {%- endif %}\n        {{- \'\\n<tool_response>\\n\' }}\n        {{- message.content }}\n        {{- \'\\n</tool_response>\' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}\n            {{- \'<|im_end|>\\n\' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- \'<|im_start|>assistant\\n\' }}\n{%- endif %}\n"""
-@PromptRegistry.register("Qwen/Qwen3")
-def qwen_3_chat_template():
-    """
-    Formats chat messages for the Qwen 3 model family, supporting multi-step tool usage, reasoning content, and special XML tags for tool calls and responses.
-    This template handles system messages, user and assistant roles, and tool interactions. When tools are provided, it outputs their signatures and instructions for function calls. It tracks the last user query to determine where to insert assistant reasoning content within `<think>` tags. Assistant tool calls are serialized as JSON within `<tool_call>` tags, and tool responses are grouped inside `<tool_response>` tags. Optionally, a generation prompt and empty reasoning block can be added.
-    Parameters:
-        tools (optional): List of tool signature objects to be included in the prompt.
-        messages: List of message objects, each with a role and content, and optionally tool_calls or reasoning_content.
-        add_generation_prompt (optional): If true, appends an assistant prompt for generation.
-        enable_thinking (optional): If false, inserts an empty reasoning block in the assistant prompt.
-    """
-    return """{%- if tools %}\n    {{- \'<|im_start|>system\\n\' }}\n    {%- if messages[0].role == \'system\' %}\n        {{- messages[0].content + \'\\n\\n\' }}\n    {%- endif %}\n    {{- "# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}\n    {%- for tool in tools %}\n        {{- "\\n" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}\n{%- else %}\n    {%- if messages[0].role == \'system\' %}\n        {{- \'<|im_start|>system\\n\' + messages[0].content + \'<|im_end|>\\n\' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith(\'<tool_response>\') and message.content.endswith(\'</tool_response>\')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = \'\' %}\n    {%- endif %}\n    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}\n        {{- \'<|im_start|>\' + message.role + \'\\n\' + content + \'<|im_end|>\' + \'\\n\' }}\n    {%- elif message.role == "assistant" %}\n        {%- set reasoning_content = \'\' %}\n        {%- if message.reasoning_content is string %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if \'</think>\' in content %}\n                {%- set reasoning_content = content.split(\'</think>\')[0].rstrip(\'\\n\').split(\'<think>\')[-1].lstrip(\'\\n\') %}\n                {%- set content = content.split(\'</think>\')[-1].lstrip(\'\\n\') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- \'<|im_start|>\' + message.role + \'\\n<think>\\n\' + reasoning_content.strip(\'\\n\') + \'\\n</think>\\n\\n\' + content.lstrip(\'\\n\') }}\n            {%- else %}\n                {{- \'<|im_start|>\' + message.role + \'\\n\' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- \'<|im_start|>\' + message.role + \'\\n\' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- \'\\n\' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- \'<tool_call>\\n{"name": "\' }}\n                {{- tool_call.name }}\n                {{- \'", "arguments": \' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- \'}\\n</tool_call>\' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- \'<|im_end|>\\n\' }}\n    {%- elif message.role == "tool" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}\n            {{- \'<|im_start|>user\' }}\n        {%- endif %}\n        {{- \'\\n<tool_response>\\n\' }}\n        {{- content }}\n        {{- \'\\n</tool_response>\' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}\n            {{- \'<|im_end|>\\n\' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- \'<|im_start|>assistant\\n\' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- \'<think>\\n\\n</think>\\n\\n\' }}\n    {%- endif %}\n{%- endif %}"""
-@PromptRegistry.register("mistralai/Mistral-Small-3")
-def mistral_small_3_chat_template():
-    return """{%- if not date_string is defined %}\n    {%- set date_string = \"2025-01-01\" %}\n{%- endif %}\n{%- set default_system_message = \"You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\\nYour knowledge base was last updated on 2023-10-01. The current date is \" + date_string + \".\\n\\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \\\"What are some good restaurants around me?\\\" => \\\"Where are you?\\\" or \\\"When is the next flight to Tokyo\\\" => \\\"Where do you travel from?\\\")\" %}\n\n<s>\n\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = default_system_message %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}\n\n{%- for message in loop_messages %}\n    {%- if message['role'] == 'user' %}\n        {{- '[INST]' + message['content'] + '[/INST]' }}\n    {%- elif message['role'] == 'system' %}\n        {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}\n    {%- elif message['role'] == 'assistant' %}\n        {{- message['content'] + '</s>' }}\n    {%- else %}\n        {{- raise_exception('Only user, system and assistant roles are supported!') }}\n    {%- endif %}\n{%- endfor %}"""

sdg_hub/registry.py DELETED Viewed

@@ -1,122 +0,0 @@
-# Standard
-from typing import Union, List, Dict
-# Third Party
-from jinja2 import Template
-# Local
-from .logger_config import setup_logger
-logger = setup_logger(__name__)
-class BlockRegistry:
-    """Registry for block classes to avoid manual additions to block type map."""
-    _registry: Dict[str, type] = {}
-    @classmethod
-    def register(cls, block_name: str):
-        """
-        Decorator to register a block class under a specified name.
-        :param block_name: Name under which to register the block.
-        """
-        def decorator(block_class):
-            cls._registry[block_name] = block_class
-            logger.debug(
-                f"Registered block '{block_name}' with class '{block_class.__name__}'"
-            )
-            return block_class
-        return decorator
-    @classmethod
-    def get_registry(cls):
-        """
-        Retrieve the current registry map of block types.
-        :return: Dictionary of registered block names and classes.
-        """
-        logger.debug("Fetching the block registry map.")
-        return cls._registry
-class PromptRegistry:
-    """Registry for managing Jinja2 prompt templates."""
-    _registry: Dict[str, Template] = {}
-    @classmethod
-    def register(cls, name: str):
-        """Decorator to register a Jinja2 template function by name.
-        :param name: Name of the template to register.
-        :return: A decorator that registers the Jinja2 template function.
-        """
-        def decorator(func):
-            template_str = func()
-            cls._registry[name] = Template(template_str)
-            logger.debug(f"Registered prompt template '{name}'")
-            return func
-        return decorator
-    @classmethod
-    def get_template(cls, name: str) -> Template:
-        """Retrieve a Jinja2 template by name.
-        :param name: Name of the template to retrieve.
-        :return: The Jinja2 template instance.
-        """
-        if name not in cls._registry:
-            raise KeyError(f"Template '{name}' not found.")
-        logger.debug(f"Retrieving prompt template '{name}'")
-        return cls._registry[name]
-    @classmethod
-    def get_registry(cls):
-        """
-        Retrieve the current registry map of block types.
-        :return: Dictionary of registered block names and classes.
-        """
-        logger.debug("Fetching the block registry map.")
-        return cls._registry
-    @classmethod
-    def render_template(
-        cls,
-        name: str,
-        messages: Union[str, List[Dict[str, str]]],
-        add_generation_prompt: bool = True,
-    ) -> str:
-        """Render the template with the provided messages or query.
-        :param name: Name of the template to render.
-        :param messages: Either a single query string or a list of messages (each as a dict with 'role' and 'content').
-        :param add_generation_prompt: Whether to add a generation prompt at the end.
-        :return: The rendered prompt as a string.
-        """
-        # Special handling for "blank" template
-        if name == "blank":
-            if not isinstance(messages, str):
-                raise ValueError(
-                    "The 'blank' template can only be used with a single query string, not a list of messages."
-                )
-            return messages  # Return the query as-is without templating
-        # Get the template
-        template = cls.get_template(name)
-        # If `messages` is a string, wrap it in a list with a default user role
-        if isinstance(messages, str):
-            messages = [{"role": "user", "content": messages}]
-        # Render the template with the `messages` list
-        return template.render(
-            messages=messages, add_generation_prompt=add_generation_prompt
-        )

sdg-hub 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl

sdg-hub 0.1.4py3-none-any.whl → 0.2.1py3-none-any.whl