PyPI - sdg-hub - Versions diffs - 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

sdg-hub 0.3.1py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

sdg_hub/core/flow/base.py CHANGED Viewed

@@ -35,7 +35,7 @@ from ..utils.logger_config import setup_logger
 from ..utils.path_resolution import resolve_path
 from ..utils.yaml_utils import save_flow_yaml
 from .checkpointer import FlowCheckpointer
-from .metadata import DatasetRequirements, FlowMetadata, FlowParameter
+from .metadata import DatasetRequirements, FlowMetadata
 from .migration import FlowMigration
 from .validation import FlowValidator
@@ -55,8 +55,6 @@ class Flow(BaseModel):
         Ordered list of blocks to execute in the flow.
     metadata : FlowMetadata
         Flow metadata including name, version, author, etc.
-    parameters : Dict[str, FlowParameter]
-        Runtime parameters that can be overridden during execution.
     """
     blocks: list[BaseBlock] = Field(
@@ -66,10 +64,6 @@ class Flow(BaseModel):
     metadata: FlowMetadata = Field(
         description="Flow metadata including name, version, author, etc."
     )
-    parameters: dict[str, FlowParameter] = Field(
-        default_factory=dict,
-        description="Runtime parameters that can be overridden during execution",
-    )
     model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
@@ -96,32 +90,6 @@ class Flow(BaseModel):
         return v
-    @field_validator("parameters")
-    @classmethod
-    def validate_parameters(
-        cls, v: dict[str, FlowParameter]
-    ) -> dict[str, FlowParameter]:
-        """Validate parameter names and ensure they are FlowParameter instances."""
-        if not v:
-            return v
-        validated = {}
-        for param_name, param_value in v.items():
-            if not isinstance(param_name, str) or not param_name.strip():
-                raise ValueError(
-                    f"Parameter name must be a non-empty string: {param_name}"
-                )
-            if not isinstance(param_value, FlowParameter):
-                raise ValueError(
-                    f"Parameter '{param_name}' must be a FlowParameter instance, "
-                    f"got: {type(param_value)}"
-                )
-            validated[param_name.strip()] = param_value
-        return validated
     @model_validator(mode="after")
     def validate_block_names_unique(self) -> "Flow":
         """Ensure all block names are unique within the flow."""
@@ -215,17 +183,6 @@ class Flow(BaseModel):
         except Exception as exc:
             raise FlowValidationError(f"Invalid metadata configuration: {exc}") from exc
-        # Extract and validate parameters
-        parameters = {}
-        params_dict = flow_config.get("parameters", {})
-        for param_name, param_config in params_dict.items():
-            try:
-                parameters[param_name] = FlowParameter(**param_config)
-            except Exception as exc:
-                raise FlowValidationError(
-                    f"Invalid parameter '{param_name}': {exc}"
-                ) from exc
         # Create blocks with validation
         blocks = []
         block_configs = flow_config.get("blocks", [])
@@ -254,7 +211,7 @@ class Flow(BaseModel):
         # Create and validate the flow
         try:
-            flow = cls(blocks=blocks, metadata=metadata, parameters=parameters)
+            flow = cls(blocks=blocks, metadata=metadata)
             # Persist generated id back to the YAML file (only on initial load)
             # If the file had no metadata.id originally, update and rewrite
             if not flow_config.get("metadata", {}).get("id"):
@@ -877,16 +834,19 @@ class Flow(BaseModel):
                             f"Block '{block.block_name}': {param_name} "
                             f"'{old_value}' -> '{param_value}'"
                         )
+                    ## check if allow extra
+                    elif block.model_config["extra"] == "allow":
+                        setattr(block, param_name, param_value)
+                        logger.debug(
+                            f"Block '{block.block_name}': {param_name} "
+                            f"'{old_value}' -> '{param_value}'"
+                        )
                     else:
                         logger.warning(
                             f"Block '{block.block_name}' ({block.__class__.__name__}) "
                             f"does not have attribute '{param_name}' - skipping"
                         )
-                # Reinitialize client manager for LLM blocks after updating config
-                if hasattr(block, "_reinitialize_client_manager"):
-                    block._reinitialize_client_manager()
                 modified_count += 1
         if modified_count > 0:
@@ -1222,17 +1182,12 @@ class Flow(BaseModel):
         # Create new flow with added block
         new_blocks = self.blocks + [block]
-        return Flow(
-            blocks=new_blocks, metadata=self.metadata, parameters=self.parameters
-        )
+        return Flow(blocks=new_blocks, metadata=self.metadata)
     def get_info(self) -> dict[str, Any]:
         """Get information about the flow."""
         return {
             "metadata": self.metadata.model_dump(),
-            "parameters": {
-                name: param.model_dump() for name, param in self.parameters.items()
-            },
             "blocks": [
                 {
                     "block_type": block.__class__.__name__,
@@ -1336,8 +1291,7 @@ class Flow(BaseModel):
         The summary contains:
         1. Flow metadata (name, version, author, description)
-        2. Defined runtime parameters with type hints and defaults
-        3. A table of all blocks with their input and output columns
+        2. A table of all blocks with their input and output columns
         Notes
         -----
@@ -1371,17 +1325,6 @@ class Flow(BaseModel):
                 f"Description: [white]{self.metadata.description}[/white]"
             )
-        # Parameters section
-        if self.parameters:
-            params_branch = flow_tree.add(
-                "[bold bright_yellow]Parameters[/bold bright_yellow]"
-            )
-            for name, param in self.parameters.items():
-                param_info = f"[bright_cyan]{name}[/bright_cyan]: [white]{param.type_hint}[/white]"
-                if param.default is not None:
-                    param_info += f" = [bright_white]{param.default}[/bright_white]"
-                params_branch.add(param_info)
         # Blocks overview
         flow_tree.add(
             f"[bold bright_magenta]Blocks[/bold bright_magenta] ({len(self.blocks)} total)"
@@ -1443,11 +1386,6 @@ class Flow(BaseModel):
             ],
         }
-        if self.parameters:
-            config["parameters"] = {
-                name: param.model_dump() for name, param in self.parameters.items()
-            }
         save_flow_yaml(output_path, config)
     def __len__(self) -> int:

sdg_hub/core/flow/metadata.py CHANGED Viewed

@@ -2,9 +2,8 @@
 """Flow metadata and parameter definitions."""
 # Standard
-from datetime import datetime
 from enum import Enum
-from typing import Any, Optional
+from typing import Optional
 # Third Party
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -118,39 +117,6 @@ class RecommendedModels(BaseModel):
         return None
-class FlowParameter(BaseModel):
-    """Represents a runtime parameter for a flow.
-    Attributes
-    ----------
-    default : Any
-        Default value for the parameter.
-    description : str
-        Human-readable description of the parameter.
-    type_hint : str
-        Type hint as string (e.g., "float", "str").
-    required : bool
-        Whether this parameter is required at runtime.
-    constraints : Dict[str, Any]
-        Additional constraints for the parameter.
-    """
-    default: Any = Field(..., description="Default value for the parameter")
-    description: str = Field(default="", description="Human-readable description")
-    type_hint: str = Field(default="Any", description="Type hint as string")
-    required: bool = Field(default=False, description="Whether parameter is required")
-    constraints: dict[str, Any] = Field(
-        default_factory=dict, description="Additional constraints for the parameter"
-    )
-    @model_validator(mode="after")
-    def validate_required_default(self) -> "FlowParameter":
-        """Validate that required parameters have appropriate defaults."""
-        if self.required and self.default is None:
-            raise ValueError("Required parameters cannot have None as default")
-        return self
 class DatasetRequirements(BaseModel):
     """Dataset requirements for flow execution.
@@ -255,20 +221,10 @@ class FlowMetadata(BaseModel):
         Simplified recommended models structure with default, compatible, and experimental lists.
     tags : List[str]
         Tags for categorization and search.
-    created_at : str
-        Creation timestamp.
-    updated_at : str
-        Last update timestamp.
     license : str
         License identifier.
-    min_sdg_hub_version : str
-        Minimum required SDG Hub version.
     dataset_requirements : Optional[DatasetRequirements]
         Requirements for input datasets.
-    estimated_cost : str
-        Estimated cost tier for running the flow.
-    estimated_duration : str
-        Estimated duration for flow execution.
     """
     name: str = Field(..., min_length=1, description="Human-readable name")
@@ -288,29 +244,10 @@ class FlowMetadata(BaseModel):
     tags: list[str] = Field(
         default_factory=list, description="Tags for categorization and search"
     )
-    created_at: str = Field(
-        default_factory=lambda: datetime.now().isoformat(),
-        description="Creation timestamp",
-    )
-    updated_at: str = Field(
-        default_factory=lambda: datetime.now().isoformat(),
-        description="Last update timestamp",
-    )
     license: str = Field(default="Apache-2.0", description="License identifier")
-    min_sdg_hub_version: str = Field(
-        default="", description="Minimum required SDG Hub version"
-    )
     dataset_requirements: Optional[DatasetRequirements] = Field(
         default=None, description="Requirements for input datasets"
     )
-    estimated_cost: str = Field(
-        default="medium",
-        pattern="^(low|medium|high)$",
-        description="Estimated cost tier for running the flow",
-    )
-    estimated_duration: str = Field(
-        default="", description="Estimated duration for flow execution"
-    )
     @field_validator("id")
     @classmethod
@@ -352,10 +289,6 @@ class FlowMetadata(BaseModel):
         # Validation is handled within RecommendedModels class
         return v
-    def update_timestamp(self) -> None:
-        """Update the updated_at timestamp."""
-        self.updated_at = datetime.now().isoformat()
     @model_validator(mode="after")
     def ensure_id(self) -> "FlowMetadata":
         """Ensure id is set.

sdg_hub/core/flow/registry.py CHANGED Viewed

@@ -360,7 +360,6 @@ class FlowRegistry:
                     "tags": ", ".join(metadata.tags) if metadata.tags else "-",
                     "description": metadata.description or "No description",
                     "version": metadata.version,
-                    "cost": metadata.estimated_cost,
                 }
             )

sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml CHANGED Viewed

@@ -17,7 +17,6 @@ metadata:
   - qa-pairs
   - detailed-summaries
   license: Apache-2.0
-  min_sdg_hub_version: 0.2.0
   dataset_requirements:
     required_columns:
     - document
@@ -61,10 +60,16 @@ blocks:
     temperature: 0.7
     n: 50
     async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_detailed_summary
+    input_cols: raw_summary
+    extract_content: true
+    expand_lists: true
 - block_type: TextParserBlock
   block_config:
     block_name: parse_detailed_summary
-    input_cols: raw_summary
+    input_cols: extract_detailed_summary_content
     output_cols: summary
     start_tags:
     - ''
@@ -99,10 +104,16 @@ blocks:
     temperature: 0.7
     n: 1
     async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_questions
+    input_cols: question_list
+    extract_content: true
+    expand_lists: true
 - block_type: TextParserBlock
   block_config:
     block_name: parse_question_list
-    input_cols: question_list
+    input_cols: extract_questions_content
     output_cols: question
     start_tags:
     - '[QUESTION]'
@@ -127,33 +138,61 @@ blocks:
     temperature: 0.7
     n: 1
     async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_answers
+    input_cols: response_dict
+    extract_content: true
+    expand_lists: true
 - block_type: TextParserBlock
   block_config:
     block_name: parse_response_dict
-    input_cols: response_dict
+    input_cols: extract_answers_content
     output_cols: response
     start_tags:
     - ''
     end_tags:
     - ''
     save_reasoning_content: true
-- block_type: EvaluateFaithfulnessBlock
+- block_type: PromptBuilderBlock
   block_config:
-    block_name: eval_faithfulness
+    block_name: eval_faithful_prompt
     input_cols:
     - document
     - response
-    output_cols:
-    - faithfulness_explanation
-    - faithfulness_judgment
+    output_cols: eval_faithful_prompt
     prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
-    filter_value: 'YES'
-    operation: eq
-    async_mode: true
     format_as_messages: true
+- block_type: LLMChatBlock
+  block_config:
+    block_name: eval_faithful_llm_chat
+    input_cols: eval_faithful_prompt
+    output_cols: eval_faithful_response_dict
+    n: 1
+    async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_eval_faithful
+    input_cols: eval_faithful_response_dict
+    extract_content: true
+- block_type: TextParserBlock
+  block_config:
+    block_name: parse_eval_faithful
+    input_cols: extract_eval_faithful_content
+    output_cols:
+    - faithfulness_explanation
+    - faithfulness_judgment
     start_tags:
     - '[Start of Explanation]'
     - '[Start of Answer]'
     end_tags:
     - '[End of Explanation]'
     - '[End of Answer]'
+- block_type: ColumnValueFilterBlock
+  block_config:
+    block_name: eval_faithful_filter
+    input_cols:
+      - faithfulness_judgment
+    filter_value: 'YES'
+    operation: eq

sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py ADDED Viewed

File without changes

sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml ADDED Viewed

@@ -0,0 +1,158 @@
+metadata:
+  name: Document Based Knowledge Tuning Dataset Generation Flow
+  description: Directly generates QA pairs from the raw document.
+  version: 2.0.0
+  author: SDG Hub Contributors
+  recommended_models:
+    default: openai/gpt-oss-120b
+    compatible:
+    - meta-llama/Llama-3.3-70B-Instruct
+    - microsoft/phi-4
+    - mistralai/Mixtral-8x7B-Instruct-v0.1
+    experimental: []
+  tags:
+  - knowledge-tuning
+  - document-internalization
+  - question-generation
+  - qa-pairs
+  - detailed-summaries
+  license: Apache-2.0
+  dataset_requirements:
+    required_columns:
+    - document
+    - document_outline
+    - domain
+    - icl_document
+    - icl_query_1
+    - icl_query_2
+    - icl_query_3
+    description: 'Input dataset should contain documents with text content and domain classification. Each document should be substantial enough for meaningful question generation (minimum 100 words recommended). The flow generates three types
+      of summaries: detailed (n=20), extractive (n=10), and key facts (n=50), each producing corresponding QA pairs designed to help LLMs internalize document knowledge for knowledge tuning.'
+  output_columns:
+  - question
+  - response
+  - raw_document
+  - faithfulness_explanation
+  - faithfulness_judgment
+  id: stellar-peak-605
+blocks:
+- block_type: DuplicateColumnsBlock
+  block_config:
+    block_name: duplicate_document_col
+    input_cols:
+      document: base_document
+- block_type: PromptBuilderBlock
+  block_config:
+    block_name: question_generation_prompt
+    input_cols:
+    - domain
+    - document
+    - document_outline
+    - icl_document
+    - icl_query_1
+    - icl_query_2
+    - icl_query_3
+    output_cols: question_generation_prompt
+    prompt_config_path: ../generate_question_list.yaml
+    format_as_messages: true
+- block_type: LLMChatBlock
+  block_config:
+    block_name: question_generation
+    input_cols: question_generation_prompt
+    output_cols: question_list
+    max_tokens: 256
+    temperature: 1.0
+    n: 1
+    async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_questions
+    input_cols: question_list
+    extract_content: true
+    expand_lists: true
+- block_type: TextParserBlock
+  block_config:
+    block_name: parse_question_list
+    input_cols: extract_questions_content
+    output_cols: question
+    start_tags:
+    - '[QUESTION]'
+    end_tags:
+    - '[END]'
+- block_type: PromptBuilderBlock
+  block_config:
+    block_name: answer_generation_prompt
+    input_cols:
+    - question
+    - document
+    - document_outline
+    output_cols: answer_generation_prompt
+    prompt_config_path: ../generate_answers.yaml
+    format_as_messages: true
+- block_type: LLMChatBlock
+  block_config:
+    block_name: answer_generation
+    input_cols: answer_generation_prompt
+    output_cols: response_dict
+    max_tokens: 4096
+    temperature: 1.0
+    n: 1
+    async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_answer
+    input_cols: response_dict
+    extract_content: true
+    expand_lists: true
+- block_type: TextParserBlock
+  block_config:
+    block_name: parse_response_dict
+    input_cols: extract_answer_content
+    output_cols: response
+    start_tags:
+    - ''
+    end_tags:
+    - ''
+    save_reasoning_content: true
+- block_type: PromptBuilderBlock
+  block_config:
+    block_name: eval_faithful_prompt
+    input_cols:
+    - document
+    - response
+    output_cols: eval_faithful_prompt
+    prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
+    format_as_messages: true
+- block_type: LLMChatBlock
+  block_config:
+    block_name: eval_faithful_llm_chat
+    input_cols: eval_faithful_prompt
+    output_cols: eval_faithful_response_dict
+    n: 1
+    async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_eval_faithful
+    input_cols: eval_faithful_response_dict
+    extract_content: true
+- block_type: TextParserBlock
+  block_config:
+    block_name: parse_eval_faithful
+    input_cols: extract_eval_faithful_content
+    output_cols:
+    - faithfulness_explanation
+    - faithfulness_judgment
+    start_tags:
+    - '[Start of Explanation]'
+    - '[Start of Answer]'
+    end_tags:
+    - '[End of Explanation]'
+    - '[End of Answer]'
+- block_type: ColumnValueFilterBlock
+  block_config:
+    block_name: eval_faithful_filter
+    input_cols:
+      - faithfulness_judgment
+    filter_value: 'YES'
+    operation: eq

sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml CHANGED Viewed

@@ -19,7 +19,6 @@ metadata:
   - qa-pairs
   - extractive-summaries
   license: Apache-2.0
-  min_sdg_hub_version: 0.2.0
   dataset_requirements:
     required_columns:
     - document
@@ -63,10 +62,16 @@ blocks:
     temperature: 0.7
     n: 50
     async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_extractive_summary
+    input_cols: raw_summary
+    extract_content: true
+    expand_lists: true
 - block_type: TextParserBlock
   block_config:
     block_name: parse_extractive_summary
-    input_cols: raw_summary
+    input_cols: extract_extractive_summary_content
     output_cols: summary
     start_tags:
     - ''
@@ -101,10 +106,16 @@ blocks:
     temperature: 0.7
     n: 1
     async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_questions
+    input_cols: question_list
+    extract_content: true
+    expand_lists: true
 - block_type: TextParserBlock
   block_config:
     block_name: parse_question_list
-    input_cols: question_list
+    input_cols: extract_questions_content
     output_cols: question
     start_tags:
     - '[QUESTION]'
@@ -129,33 +140,61 @@ blocks:
     temperature: 0.7
     n: 1
     async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_answers
+    input_cols: response_dict
+    extract_content: true
+    expand_lists: true
 - block_type: TextParserBlock
   block_config:
     block_name: parse_response_dict
-    input_cols: response_dict
+    input_cols: extract_answers_content
     output_cols: response
     start_tags:
     - ''
     end_tags:
     - ''
     save_reasoning_content: true
-- block_type: EvaluateFaithfulnessBlock
+- block_type: PromptBuilderBlock
   block_config:
-    block_name: eval_faithfulness
+    block_name: eval_faithful_prompt
     input_cols:
     - document
     - response
-    output_cols:
-    - faithfulness_explanation
-    - faithfulness_judgment
+    output_cols: eval_faithful_prompt
     prompt_config_path: ../../multi_summary_qa/instructlab/evaluate_faithfulness.yaml
-    filter_value: 'YES'
-    operation: eq
-    async_mode: true
     format_as_messages: true
+- block_type: LLMChatBlock
+  block_config:
+    block_name: eval_faithful_llm_chat
+    input_cols: eval_faithful_prompt
+    output_cols: eval_faithful_response_dict
+    n: 1
+    async_mode: true
+- block_type: LLMParserBlock
+  block_config:
+    block_name: extract_eval_faithful
+    input_cols: eval_faithful_response_dict
+    extract_content: true
+- block_type: TextParserBlock
+  block_config:
+    block_name: parse_eval_faithful
+    input_cols: extract_eval_faithful_content
+    output_cols:
+    - faithfulness_explanation
+    - faithfulness_judgement
     start_tags:
     - '[Start of Explanation]'
     - '[Start of Answer]'
     end_tags:
     - '[End of Explanation]'
     - '[End of Answer]'
+- block_type: ColumnValueFilterBlock
+  block_config:
+    block_name: eval_faithful_filter
+    input_cols:
+      - faithfulness_judgement
+    filter_value: 'YES'
+    operation: eq

sdg-hub 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl

sdg-hub 0.3.1py3-none-any.whl → 0.4.1py3-none-any.whl