PyPI - sdg-hub - Versions diffs - 0.1.0a1__py3-none-any.whl - Mend

sdg-hub 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

sdg_hub/__init__.py +4 -0
sdg_hub/_version.py +21 -0
sdg_hub/blocks/__init__.py +6 -0
sdg_hub/blocks/block.py +54 -0
sdg_hub/blocks/filterblock.py +76 -0
sdg_hub/blocks/iterblock.py +31 -0
sdg_hub/blocks/llmblock.py +430 -0
sdg_hub/blocks/rmblocks.py +194 -0
sdg_hub/blocks/utilblocks.py +140 -0
sdg_hub/configs/__init__.py +0 -0
sdg_hub/configs/annotations/__init__.py +0 -0
sdg_hub/configs/annotations/cot_reflection.yaml +34 -0
sdg_hub/configs/annotations/detailed_description.yaml +10 -0
sdg_hub/configs/annotations/detailed_description_icl.yaml +32 -0
sdg_hub/configs/annotations/simple.yaml +10 -0
sdg_hub/configs/knowledge/__init__.py +0 -0
sdg_hub/configs/knowledge/atomic_facts.yaml +45 -0
sdg_hub/configs/knowledge/auxilary_instructions.yaml +35 -0
sdg_hub/configs/knowledge/data_recipe/__init__.py +0 -0
sdg_hub/configs/knowledge/data_recipe/default_recipe.yaml +3 -0
sdg_hub/configs/knowledge/detailed_summary.yaml +17 -0
sdg_hub/configs/knowledge/evaluate_faithfulness.yaml +68 -0
sdg_hub/configs/knowledge/evaluate_question.yaml +38 -0
sdg_hub/configs/knowledge/evaluate_relevancy.yaml +85 -0
sdg_hub/configs/knowledge/extractive_summary.yaml +17 -0
sdg_hub/configs/knowledge/generate_code_questions_responses.yaml +39 -0
sdg_hub/configs/knowledge/generate_questions_responses.yaml +56 -0
sdg_hub/configs/knowledge/mcq_generation.yaml +83 -0
sdg_hub/configs/knowledge/router.yaml +12 -0
sdg_hub/configs/knowledge/simple_generate_qa.yaml +34 -0
sdg_hub/configs/reasoning/dynamic_cot.yaml +40 -0
sdg_hub/configs/skills/_A_.yaml +97 -0
sdg_hub/configs/skills/_B_.yaml +36 -0
sdg_hub/configs/skills/_C_.yaml +71 -0
sdg_hub/configs/skills/_D_.yaml +85 -0
sdg_hub/configs/skills/_E_.yaml +30 -0
sdg_hub/configs/skills/_F_.yaml +45 -0
sdg_hub/configs/skills/_G_.yaml +56 -0
sdg_hub/configs/skills/_H_.yaml +80 -0
sdg_hub/configs/skills/__init__.py +0 -0
sdg_hub/configs/skills/analyzer.yaml +48 -0
sdg_hub/configs/skills/annotation.yaml +36 -0
sdg_hub/configs/skills/contexts.yaml +21 -0
sdg_hub/configs/skills/critic.yaml +60 -0
sdg_hub/configs/skills/data_recipe/__init__.py +0 -0
sdg_hub/configs/skills/data_recipe/default_recipe.yaml +6 -0
sdg_hub/configs/skills/evaluate_freeform_pair.yaml +44 -0
sdg_hub/configs/skills/evaluate_freeform_questions.yaml +46 -0
sdg_hub/configs/skills/evaluate_grounded_pair.yaml +54 -0
sdg_hub/configs/skills/evaluate_grounded_questions.yaml +51 -0
sdg_hub/configs/skills/freeform_questions.yaml +29 -0
sdg_hub/configs/skills/freeform_responses.yaml +45 -0
sdg_hub/configs/skills/grounded_questions.yaml +38 -0
sdg_hub/configs/skills/grounded_responses.yaml +59 -0
sdg_hub/configs/skills/judge.yaml +53 -0
sdg_hub/configs/skills/planner.yaml +67 -0
sdg_hub/configs/skills/respond.yaml +8 -0
sdg_hub/configs/skills/revised_responder.yaml +78 -0
sdg_hub/configs/skills/router.yaml +12 -0
sdg_hub/configs/skills/simple_generate_qa_freeform.yaml +27 -0
sdg_hub/configs/skills/simple_generate_qa_grounded.yaml +31 -0
sdg_hub/flow.py +127 -0
sdg_hub/flows/annotation/emotion/detailed_description.yaml +19 -0
sdg_hub/flows/annotation/emotion/detailed_description_icl.yaml +19 -0
sdg_hub/flows/annotation/emotion/simple.yaml +19 -0
sdg_hub/flows/generation/knowledge/mmlu_bench.yaml +13 -0
sdg_hub/flows/generation/knowledge/simple_knowledge.yaml +12 -0
sdg_hub/flows/generation/knowledge/synth_knowledge.yaml +89 -0
sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml +136 -0
sdg_hub/flows/generation/skills/agentic_improve_skill.yaml +108 -0
sdg_hub/flows/generation/skills/simple_freeform_skill.yaml +12 -0
sdg_hub/flows/generation/skills/simple_grounded_skill.yaml +12 -0
sdg_hub/flows/generation/skills/synth_grounded_skills.yaml +80 -0
sdg_hub/flows/generation/skills/synth_skills.yaml +59 -0
sdg_hub/logger_config.py +20 -0
sdg_hub/pipeline.py +66 -0
sdg_hub/prompts.py +17 -0
sdg_hub/py.typed +0 -0
sdg_hub/registry.py +122 -0
sdg_hub/sdg.py +164 -0
sdg_hub/utils/__init__.py +5 -0
sdg_hub/utils/chunking.py +73 -0
sdg_hub/utils/datamixing.py +123 -0
sdg_hub/utils/datautils.py +14 -0
sdg_hub/utils/docprocessor.py +357 -0
sdg_hub/utils/json.py +48 -0
sdg_hub/utils/models.py +31 -0
sdg_hub/utils/parse_and_convert.py +392 -0
sdg_hub/utils/taxonomy.py +489 -0
sdg_hub-0.1.0a1.dist-info/METADATA +154 -0
sdg_hub-0.1.0a1.dist-info/RECORD +94 -0
sdg_hub-0.1.0a1.dist-info/WHEEL +5 -0
sdg_hub-0.1.0a1.dist-info/licenses/LICENSE +201 -0
sdg_hub-0.1.0a1.dist-info/top_level.txt +1 -0

sdg_hub/flow.py ADDED Viewed

@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# Standard
+from abc import ABC
+from importlib import resources
+from typing import Optional
+import operator
+import os
+# Third Party
+import yaml
+# Local
+from .registry import BlockRegistry, PromptRegistry
+from . import prompts
+from . import blocks
+OPERATOR_MAP = {
+    "operator.eq": operator.eq,
+    "operator.ge": operator.ge,
+    "operator.contains": operator.contains,
+}
+CONVERT_DTYPE_MAP = {
+    "float": float,
+    "int": int,
+}
+class Flow(ABC):
+    def __init__(
+        self,
+        llm_client,
+        num_samples_to_generate: Optional[int] = None,
+    ) -> None:
+        self.llm_client = llm_client
+        self.num_samples_to_generate = num_samples_to_generate
+        self.base_path = str(resources.files(__package__))
+        self.registered_blocks = BlockRegistry.get_registry()
+    def get_flow_from_file(self, yaml_path: str) -> list:
+        yaml_path_relative_to_base = os.path.join(self.base_path, yaml_path)
+        if os.path.isfile(yaml_path_relative_to_base):
+            yaml_path = yaml_path_relative_to_base
+        try:
+            with open(yaml_path, "r", encoding="utf-8") as yaml_file:
+                flow = yaml.safe_load(yaml_file)
+        except FileNotFoundError as exc:
+            raise FileNotFoundError(f"File not found: {yaml_path}") from exc
+        # update config with class instances
+        for block in flow:
+            # check if theres an llm block in the flow
+            if "LLM" in block["block_type"]:
+                block["block_config"]["client"] = self.llm_client
+                # model_id and prompt templates
+                # try to get a template using the model_id, but if model_prompt_template is provided, use that
+                if block["block_config"].get("model_prompt", None) is None:
+                    # try to find a match in the registry
+                    matched_prompt = next(
+                        (
+                            key
+                            for key in PromptRegistry.get_registry()
+                            if key in block["block_config"]["model_id"]
+                        ),
+                        None,
+                    )
+                    if matched_prompt is not None:
+                        block["block_config"]["model_prompt"] = matched_prompt
+                    else:
+                        raise KeyError(
+                            f"Prompt not found in registry: {block['block_config']['model_id']}"
+                        )
+                if self.num_samples_to_generate is not None:
+                    block["num_samples"] = self.num_samples_to_generate
+            # update block type to llm class instance
+            try:
+                block["block_type"] = self.registered_blocks[block["block_type"]]
+            except KeyError as exc:
+                raise KeyError(
+                    f"Block not found in registry: {block['block_type']}"
+                ) from exc
+            # update config path to absolute path
+            if "config_path" in block["block_config"]:
+                config_path_relative_to_base = os.path.join(
+                    self.base_path, block["block_config"]["config_path"]
+                )
+                if os.path.isfile(config_path_relative_to_base):
+                    block["block_config"]["config_path"] = config_path_relative_to_base
+            # update config paths to absolute paths - this might be a list or a dict
+            if "config_paths" in block["block_config"]:
+                if isinstance(block["block_config"]["config_paths"], dict):
+                    for key, path in block["block_config"]["config_paths"].items():
+                        config_path_relative_to_base = os.path.join(
+                            self.base_path, path
+                        )
+                        if os.path.isfile(config_path_relative_to_base):
+                            block["block_config"]["config_paths"][key] = (
+                                config_path_relative_to_base
+                            )
+                if isinstance(block["block_config"]["config_paths"], list):
+                    for i, path in enumerate(block["block_config"]["config_paths"]):
+                        config_path_relative_to_base = os.path.join(
+                            self.base_path, path
+                        )
+                        if os.path.isfile(config_path_relative_to_base):
+                            block["block_config"]["config_paths"][i] = (
+                                config_path_relative_to_base
+                            )
+            if "operation" in block["block_config"]:
+                block["block_config"]["operation"] = OPERATOR_MAP[
+                    block["block_config"]["operation"]
+                ]
+            if "convert_dtype" in block["block_config"]:
+                block["block_config"]["convert_dtype"] = CONVERT_DTYPE_MAP[
+                    block["block_config"]["convert_dtype"]
+                ]
+        return flow

sdg_hub/flows/annotation/emotion/detailed_description.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_responses
+    config_path: configs/annotations/detailed_description.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - output
+  gen_kwargs:
+    max_tokens: 5
+    temperature: 0
+    extra_body:
+      guided_choice:
+        - "joy"
+        - "sadness"
+        - "anger"
+        - "fear"
+        - "love"
+  drop_duplicates:
+    - prompt

sdg_hub/flows/annotation/emotion/detailed_description_icl.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_responses
+    config_path: configs/annotations/detailed_description_icl.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - output
+  gen_kwargs:
+    max_tokens: 5
+    temperature: 0
+    extra_body:
+      guided_choice:
+        - "joy"
+        - "sadness"
+        - "anger"
+        - "fear"
+        - "love"
+  drop_duplicates:
+    - prompt

sdg_hub/flows/annotation/emotion/simple.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_responses
+    config_path: configs/annotations/simple.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - output
+  gen_kwargs:
+    max_tokens: 5
+    temperature: 0
+    extra_body:
+      guided_choice:
+        - "joy"
+        - "sadness"
+        - "anger"
+        - "fear"
+        - "love"
+  drop_duplicates:
+    - prompt

sdg_hub/flows/generation/knowledge/mmlu_bench.yaml ADDED Viewed

@@ -0,0 +1,13 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_mmlu_knowledge
+    config_path: configs/knowledge/mcq_generation.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - mmlubench_question
+      - mmlubench_answer
+  gen_kwargs:
+    temperature: 0
+    max_tokens: 2048
+  drop_duplicates:
+    - mmlubench_question

sdg_hub/flows/generation/knowledge/simple_knowledge.yaml ADDED Viewed

@@ -0,0 +1,12 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_knowledge
+    config_path: configs/knowledge/simple_generate_qa.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - output
+  gen_kwargs:
+    temperature: 0.7
+    max_tokens: 2048
+  drop_duplicates:
+    - output

sdg_hub/flows/generation/knowledge/synth_knowledge.yaml ADDED Viewed

@@ -0,0 +1,89 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_knowledge
+    config_path: configs/knowledge/generate_questions_responses.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - question
+      - response
+    parser_kwargs:
+      parser_name: custom
+      parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
+      parser_cleanup_tags:
+        - "[END]"
+  gen_kwargs:
+    max_tokens: 2048
+  drop_duplicates:
+    - question
+- block_type: LLMBlock
+  block_config:
+    block_name: eval_faithfulness_qa_pair
+    config_path: configs/knowledge/evaluate_faithfulness.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - explanation
+      - judgment
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_faithfulness
+    filter_column: judgment
+    filter_value: "YES"
+    operation: operator.eq
+    batch_kwargs:
+      num_procs: 8
+  drop_columns:
+    - judgment
+    - explanation
+- block_type: LLMBlock
+  block_config:
+    block_name: eval_relevancy_qa_pair
+    config_path: configs/knowledge/evaluate_relevancy.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - feedback
+      - score
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_relevancy
+    filter_column: score
+    filter_value: 2.0
+    operation: operator.eq
+    convert_dtype: float
+    batch_kwargs:
+      num_procs: 8
+  drop_columns:
+    - feedback
+    - score
+- block_type: LLMBlock
+  block_config:
+    block_name: eval_verify_question
+    config_path: configs/knowledge/evaluate_question.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - explanation
+      - rating
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_verify_question
+    filter_column: rating
+    filter_value: 1.0
+    operation: operator.eq
+    convert_dtype: float
+    batch_kwargs:
+      num_procs: 8
+  drop_columns:
+    - explanation
+    - rating
+    - __index_level_0__

sdg_hub/flows/generation/knowledge/synth_knowledge1.5.yaml ADDED Viewed

@@ -0,0 +1,136 @@
+- block_type: DuplicateColumns
+  block_config:
+    block_name: duplicate_document_col
+    columns_map:
+      document: base_document
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_detailed_summary
+    config_path: configs/knowledge/detailed_summary.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - summary_detailed
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_atomic_facts
+    config_path: configs/knowledge/atomic_facts.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - summary_atomic_facts
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_extractive_summary
+    config_path: configs/knowledge/extractive_summary.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - summary_extractive
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: FlattenColumnsBlock
+  block_config:
+    block_name: flatten_summary_columns
+    var_cols:
+      - summary_detailed
+      - summary_extractive
+      - summary_atomic_facts
+      - base_document
+    value_name: summary
+    var_name: dataset_type
+- block_type: RenameColumns
+  block_config:
+    block_name: rename_to_document_column
+    columns_map:
+      document: raw_document
+      summary: document
+- block_type: LLMBlock
+  block_config:
+    block_name: knowledge generation
+    config_path: configs/knowledge/generate_questions_responses.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - question
+      - response
+    parser_kwargs:
+      parser_name: custom
+      parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
+      parser_cleanup_tags:
+        - "[END]"
+  gen_kwargs:
+    temperature: 0.0
+    max_tokens: 2048
+- block_type: LLMBlock
+  block_config:
+    block_name: eval_faithfulness_qa_pair
+    config_path: configs/knowledge/evaluate_faithfulness.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - explanation
+      - judgment
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_faithfulness
+    filter_column: judgment
+    filter_value: "YES"
+    operation: operator.eq
+  drop_columns:
+    - judgment
+    - explanation
+- block_type: LLMBlock
+  block_config:
+    block_name: eval_relevancy_qa_pair
+    config_path: configs/knowledge/evaluate_relevancy.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - feedback
+      - score
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_relevancy
+    filter_column: score
+    filter_value: 2.0
+    operation: operator.eq
+    convert_dtype: float
+  drop_columns:
+    - feedback
+    - score
+- block_type: LLMBlock
+  block_config:
+    block_name: eval_verify_question
+    config_path: configs/knowledge/evaluate_question.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - explanation
+      - rating
+  gen_kwargs:
+    max_tokens: 2048
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_verify_question
+    filter_column: rating
+    filter_value: 1.0
+    operation: operator.eq
+    convert_dtype: float
+  drop_columns:
+    - explanation
+    - rating
+    - __index_level_0__

sdg_hub/flows/generation/skills/agentic_improve_skill.yaml ADDED Viewed

@@ -0,0 +1,108 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: router
+    config_path: configs/skills/router.yaml
+    model_id: skill-classifier-v3-clm
+    output_cols:
+      - route
+  gen_kwargs:
+    temperature: 0
+    max_tokens: 1
+    extra_body:
+      allowed_token_ids:
+        - 32001
+        - 32002
+        - 32003
+        - 32004
+        - 32005
+        - 32006
+        - 32007
+        - 32008
+- block_type: SamplePopulatorBlock
+  block_config:
+    block_name: icl_populator
+    config_paths:
+      - configs/skills/_A_.yaml
+      - configs/skills/_B_.yaml
+      - configs/skills/_C_.yaml
+      - configs/skills/_D_.yaml
+      - configs/skills/_E_.yaml
+      - configs/skills/_F_.yaml
+      - configs/skills/_G_.yaml
+      - configs/skills/_H_.yaml
+    column_name: route
+    batch_kwargs:
+      num_procs: 8
+- block_type: LLMBlock
+  block_config:
+    block_name: analyzer
+    config_path: configs/skills/analyzer.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    model_prompt: <s> [INST] {prompt} [/INST]
+    output_cols:
+      - analysis
+      - rubric
+- block_type: LLMBlock
+  block_config:
+    block_name: critic
+    config_path: configs/skills/critic.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    model_prompt: <s> [INST] {prompt} [/INST]
+    output_cols:
+      - critique
+- block_type: LLMBlock
+  block_config:
+    block_name: planner
+    config_path: configs/skills/planner.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    model_prompt: <s> [INST] {prompt} [/INST]
+    output_cols:
+      - plan
+- block_type: LLMBlock
+  block_config:
+    block_name: revised_responder
+    config_path: configs/skills/revised_responder.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    model_prompt: <s> [INST] {prompt} [/INST]
+    output_cols:
+      - revised_response
+  drop_columns:
+    - icl_query
+    - icl_response
+    - icl_analysis
+    - icl_rubric
+    - icl_critique
+    - icl_plan
+    - icl_revised_response
+- block_type: LLMBlock
+  block_config:
+    block_name: judge
+    config_path: configs/skills/judge.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    model_prompt: <s> [INST] {prompt} [/INST]
+    output_cols:
+      - judgement
+      - verdict
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_judgement
+    filter_column: verdict
+    filter_value:
+      - Assistant A
+      - Assistant B
+    operation: operator.contains
+    batch_kwargs:
+      num_procs: 8
+- block_type: SelectorBlock
+  block_config:
+    block_name: response_selector
+    choice_map:
+       Assistant A: "response"
+       Assistant B: "revised_response"
+    choice_col: verdict
+    output_col: chosen_reponse
+    batch_kwargs:
+      num_procs: 8
+  drop_columns:
+    - judgemnent
+    - verdict

sdg_hub/flows/generation/skills/simple_freeform_skill.yaml ADDED Viewed

@@ -0,0 +1,12 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_skill_freeform
+    config_path: configs/skills/simple_generate_qa_freeform.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - output
+  gen_kwargs:
+    temperature: 0.7
+    max_tokens: 2048
+  drop_duplicates:
+    - output

sdg_hub/flows/generation/skills/simple_grounded_skill.yaml ADDED Viewed

@@ -0,0 +1,12 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_skill_grounded
+    config_path: configs/skills/simple_generate_qa_grounded.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - output
+  gen_kwargs:
+    temperature: 0.7
+    max_tokens: 2048
+  drop_duplicates:
+    - output

sdg_hub/flows/generation/skills/synth_grounded_skills.yaml ADDED Viewed

@@ -0,0 +1,80 @@
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_contexts
+    config_path: configs/skills/contexts.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - context
+  gen_kwargs:
+    temperature: 0.7
+    max_tokens: 2048
+    n: 10
+    seed: 42
+  drop_duplicates:
+    - context
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_grounded_questions
+    config_path: configs/skills/grounded_questions.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - question
+    batch_kwargs:
+      num_samples: 3
+  drop_duplicates:
+    - question
+- block_type: LLMBlock
+  block_config:
+    block_name: eval_grounded_questions
+    config_path: configs/skills/evaluate_grounded_questions.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - evaluation
+      - score
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_grounded_questions
+    filter_column: score
+    filter_value: 1.0
+    operation: operator.eq
+    convert_dtype: float
+    batch_kwargs:
+      num_procs: 8
+  drop_columns:
+    - evaluation
+    - score
+    - num_samples
+- block_type: LLMBlock
+  block_config:
+    block_name: gen_grounded_responses
+    config_path: configs/skills/grounded_responses.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - response
+- block_type: LLMBlock
+  block_config:
+    block_name: evaluate_grounded_qa_pair
+    config_path: configs/skills/evaluate_grounded_pair.yaml
+    model_id: mistralai/Mixtral-8x7B-Instruct-v0.1
+    output_cols:
+      - evaluation
+      - score
+- block_type: FilterByValueBlock
+  block_config:
+    block_name: filter_grounded_qa_pair
+    filter_column: score
+    filter_value: 2.0
+    operation: operator.ge
+    convert_dtype: float
+    batch_kwargs:
+      num_procs: 8
+- block_type: CombineColumnsBlock
+  block_config:
+    block_name: combine_question_and_context
+    columns:
+      - context
+      - question
+    output_col: question
+    batch_kwargs:
+      num_procs: 8
+      batched: True