PyPI - sdg-hub - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

sdg-hub 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

sdg_hub/_version.py +16 -3
sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
sdg_hub/core/blocks/llm/__init__.py +2 -0
sdg_hub/core/blocks/llm/client_manager.py +61 -24
sdg_hub/core/blocks/llm/config.py +1 -0
sdg_hub/core/blocks/llm/llm_chat_block.py +62 -7
sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +653 -0
sdg_hub/core/blocks/llm/text_parser_block.py +75 -30
sdg_hub/core/blocks/registry.py +49 -35
sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
sdg_hub/core/flow/base.py +370 -20
sdg_hub/core/flow/checkpointer.py +333 -0
sdg_hub/core/flow/metadata.py +45 -0
sdg_hub/core/flow/migration.py +12 -1
sdg_hub/core/flow/registry.py +121 -58
sdg_hub/core/flow/validation.py +12 -0
sdg_hub/core/utils/__init__.py +2 -1
sdg_hub/core/utils/datautils.py +81 -1
sdg_hub/core/utils/flow_id_words.yaml +231 -0
sdg_hub/core/utils/flow_identifier.py +94 -0
sdg_hub/core/utils/yaml_utils.py +59 -0
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -7
{sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/METADATA +59 -31
{sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/RECORD +30 -25
{sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/WHEEL +0 -0
{sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/licenses/LICENSE +0 -0
{sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/top_level.txt +0 -0

sdg_hub/core/blocks/llm/text_parser_block.py CHANGED Viewed

@@ -48,6 +48,9 @@ class TextParserBlock(BaseBlock):
         Regex pattern for custom parsing.
     parser_cleanup_tags : Optional[List[str]]
         List of tags to clean from parsed output.
+    expand_lists : bool
+        Whether to expand list inputs into individual rows (True) or preserve lists (False).
+        Default is True for backward compatibility.
     """
     start_tags: list[str] = Field(
@@ -62,6 +65,10 @@ class TextParserBlock(BaseBlock):
     parser_cleanup_tags: Optional[list[str]] = Field(
         default=None, description="List of tags to clean from parsed output"
     )
+    expand_lists: bool = Field(
+        default=True,
+        description="Whether to expand list inputs into individual rows (True) or preserve lists (False). ",
+    )
     @field_validator("start_tags", "end_tags", mode="before")
     @classmethod
@@ -237,36 +244,74 @@ class TextParserBlock(BaseBlock):
                 logger.warning(f"Input column '{input_column}' contains empty list")
                 return []
-            all_results = []
-            for i, response in enumerate(raw_output):
-                if not response or not isinstance(response, str):
-                    logger.warning(
-                        f"List item {i} in column '{input_column}' contains invalid data "
-                        f"(empty or non-string): {type(response)}"
-                    )
-                    continue
-                parsed_outputs = self._parse(response)
-                if not parsed_outputs or not any(
-                    len(value) > 0 for value in parsed_outputs.values()
-                ):
-                    logger.warning(
-                        f"Failed to parse content from list item {i}. Raw output length: {len(response)}, "
-                        f"parsing method: {'regex' if self.parsing_pattern else 'tags'}"
-                    )
-                    continue
-                # Create output rows for this response
-                max_length = max(len(value) for value in parsed_outputs.values())
-                for values in zip(
-                    *(lst[:max_length] for lst in parsed_outputs.values())
-                ):
-                    all_results.append(
-                        {**sample, **dict(zip(parsed_outputs.keys(), values))}
-                    )
-            return all_results
+            if not self.expand_lists:
+                # When expand_lists=False, preserve the list structure
+                # Parse each response in the list and collect results as lists
+                all_parsed_outputs = {col: [] for col in self.output_cols}
+                valid_responses = 0
+                for i, response in enumerate(raw_output):
+                    if not response or not isinstance(response, str):
+                        logger.warning(
+                            f"List item {i} in column '{input_column}' contains invalid data "
+                            f"(empty or non-string): {type(response)}"
+                        )
+                        continue
+                    parsed_outputs = self._parse(response)
+                    if not parsed_outputs or not any(
+                        len(value) > 0 for value in parsed_outputs.values()
+                    ):
+                        logger.warning(
+                            f"Failed to parse content from list item {i}. Raw output length: {len(response)}, "
+                            f"parsing method: {'regex' if self.parsing_pattern else 'tags'}"
+                        )
+                        continue
+                    valid_responses += 1
+                    # Collect all parsed values for each column as lists
+                    for col in self.output_cols:
+                        all_parsed_outputs[col].extend(parsed_outputs.get(col, []))
+                if valid_responses == 0:
+                    return []
+                # Return single row with lists as values
+                return [{**sample, **all_parsed_outputs}]
+            else:
+                # When expand_lists=True, use existing expanding behavior
+                all_results = []
+                for i, response in enumerate(raw_output):
+                    if not response or not isinstance(response, str):
+                        logger.warning(
+                            f"List item {i} in column '{input_column}' contains invalid data "
+                            f"(empty or non-string): {type(response)}"
+                        )
+                        continue
+                    parsed_outputs = self._parse(response)
+                    if not parsed_outputs or not any(
+                        len(value) > 0 for value in parsed_outputs.values()
+                    ):
+                        logger.warning(
+                            f"Failed to parse content from list item {i}. Raw output length: {len(response)}, "
+                            f"parsing method: {'regex' if self.parsing_pattern else 'tags'}"
+                        )
+                        continue
+                    # Create output rows for this response
+                    max_length = max(len(value) for value in parsed_outputs.values())
+                    for values in zip(
+                        *(lst[:max_length] for lst in parsed_outputs.values())
+                    ):
+                        all_results.append(
+                            {**sample, **dict(zip(parsed_outputs.keys(), values))}
+                        )
+                return all_results
         # Handle string inputs (existing logic)
         elif isinstance(raw_output, str):

sdg_hub/core/blocks/registry.py CHANGED Viewed

@@ -164,8 +164,10 @@ class BlockRegistry:
                 ) from exc
     @classmethod
-    def get(cls, block_name: str) -> type:
-        """Get a block class with enhanced error handling.
+    def _get(cls, block_name: str) -> type:
+        """Internal method to get a block class with enhanced error handling.
+        This is a private method used by the framework internals (Flow system).
         Parameters
         ----------
@@ -216,29 +218,6 @@ class BlockRegistry:
         return metadata.block_class
-    @classmethod
-    def info(cls, block_name: str) -> BlockMetadata:
-        """Get metadata for a specific block.
-        Parameters
-        ----------
-        block_name : str
-            Name of the block.
-        Returns
-        -------
-        BlockMetadata
-            The block's metadata.
-        Raises
-        ------
-        KeyError
-            If the block is not found.
-        """
-        if block_name not in cls._metadata:
-            raise KeyError(f"Block '{block_name}' not found in registry.")
-        return cls._metadata[block_name]
     @classmethod
     def categories(cls) -> list[str]:
         """Get all available categories.
@@ -251,8 +230,8 @@ class BlockRegistry:
         return sorted(cls._categories.keys())
     @classmethod
-    def category(cls, category: str) -> list[str]:
-        """Get all blocks in a specific category.
+    def _get_category_blocks(cls, category: str) -> list[str]:
+        """Get all blocks in a specific category (private method).
         Parameters
         ----------
@@ -278,20 +257,55 @@ class BlockRegistry:
         return sorted(cls._categories[category])
     @classmethod
-    def all(cls) -> dict[str, list[str]]:
-        """List all blocks organized by category.
+    def list_blocks(
+        cls,
+        category: Optional[str] = None,
+        *,
+        grouped: bool = False,
+        include_deprecated: bool = True,
+    ) -> list[str] | dict[str, list[str]]:
+        """
+        List registered blocks, optionally filtered by category.
+        Args:
+            category: If provided, return only blocks in this category.
+            grouped: If True (and category is None), return a dict
+                    mapping categories to lists of blocks.
+            include_deprecated: If True, return deprecated blocks.
         Returns
         -------
-        Dict[str, List[str]]
-            Dictionary mapping categories to lists of block names.
+        List[str] | Dict[str, List[str]]
+            If grouped is False, returns a list of block names.
+            If grouped is True, returns a dict mapping categories to lists of block names.
         """
-        return {
-            category: sorted(blocks) for category, blocks in cls._categories.items()
-        }
+        def filter_deprecated(block_names: list[str]) -> list[str]:
+            if include_deprecated:
+                return block_names
+            return [name for name in block_names if not cls._metadata[name].deprecated]
+        if category:
+            block_names = cls._get_category_blocks(category)
+            return filter_deprecated(block_names)
+        if grouped:
+            result = {}
+            for cat, blocks in cls._categories.items():
+                filtered = filter_deprecated(sorted(blocks))
+                if filtered:
+                    result[cat] = filtered
+            return result
+        # Flat list of all block names (across all categories)
+        all_block_names = []
+        for blocks in cls._categories.values():
+            all_block_names.extend(blocks)
+        filtered = filter_deprecated(sorted(all_block_names))
+        return filtered
     @classmethod
-    def show(cls) -> None:
+    def discover_blocks(cls) -> None:
         """Print a Rich-formatted table of all available blocks."""
         if not cls._metadata:
             console.print("[yellow]No blocks registered yet.[/yellow]")

sdg_hub/core/blocks/transform/index_based_mapper.py CHANGED Viewed

@@ -174,7 +174,7 @@ class IndexBasedMapperBlock(BaseBlock):
             sample[output_col] = sample[source_col]
         return sample
-    def generate(self, samples: Dataset) -> Dataset:
+    def generate(self, samples: Dataset, **kwargs) -> Dataset:
         """Generate a new dataset with selected values.
         Parameters

sdg-hub 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

sdg-hub 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl