sdg-hub 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. sdg_hub/__init__.py +0 -2
  2. sdg_hub/_version.py +2 -2
  3. sdg_hub/core/__init__.py +1 -2
  4. sdg_hub/core/blocks/__init__.py +2 -4
  5. sdg_hub/core/blocks/base.py +61 -6
  6. sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
  7. sdg_hub/core/blocks/llm/__init__.py +2 -4
  8. sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
  9. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
  10. sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
  11. sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
  12. sdg_hub/core/flow/__init__.py +3 -4
  13. sdg_hub/core/flow/base.py +11 -73
  14. sdg_hub/core/flow/metadata.py +1 -68
  15. sdg_hub/core/flow/registry.py +0 -1
  16. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -12
  17. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
  18. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +158 -0
  19. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -12
  20. sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -3
  21. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +147 -28
  22. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
  23. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
  24. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
  25. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
  26. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
  27. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +303 -0
  28. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
  29. sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -5
  30. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/METADATA +2 -1
  31. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/RECORD +34 -30
  32. sdg_hub/core/blocks/evaluation/__init__.py +0 -9
  33. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
  34. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
  35. sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
  36. sdg_hub/core/blocks/llm/client_manager.py +0 -472
  37. sdg_hub/core/blocks/llm/config.py +0 -337
  38. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/WHEEL +0 -0
  39. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/licenses/LICENSE +0 -0
  40. {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.1.dist-info}/top_level.txt +0 -0
sdg_hub/__init__.py CHANGED
@@ -8,7 +8,6 @@ from .core import (
8
8
  BlockRegistry,
9
9
  Flow,
10
10
  FlowMetadata,
11
- FlowParameter,
12
11
  FlowRegistry,
13
12
  FlowValidator,
14
13
  GenerateError,
@@ -23,7 +22,6 @@ __all__ = [
23
22
  "FlowRegistry",
24
23
  # Metadata and utilities
25
24
  "FlowMetadata",
26
- "FlowParameter",
27
25
  "FlowValidator",
28
26
  "GenerateError",
29
27
  "resolve_path",
sdg_hub/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.1'
32
- __version_tuple__ = version_tuple = (0, 3, 1)
31
+ __version__ = version = '0.4.1'
32
+ __version_tuple__ = version_tuple = (0, 4, 1)
33
33
 
34
34
  __commit_id__ = commit_id = None
sdg_hub/core/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  # Local
5
5
  from .blocks import BaseBlock, BlockRegistry
6
- from .flow import Flow, FlowMetadata, FlowParameter, FlowRegistry, FlowValidator
6
+ from .flow import Flow, FlowMetadata, FlowRegistry, FlowValidator
7
7
  from .utils import GenerateError, resolve_path
8
8
 
9
9
  __all__ = [
@@ -14,7 +14,6 @@ __all__ = [
14
14
  "Flow",
15
15
  "FlowRegistry",
16
16
  "FlowMetadata",
17
- "FlowParameter",
18
17
  "FlowValidator",
19
18
  # Utils
20
19
  "GenerateError",
@@ -16,9 +16,8 @@ from .deprecated_blocks import (
16
16
  SelectorBlock,
17
17
  SetToMajorityValue,
18
18
  )
19
- from .evaluation import EvaluateFaithfulnessBlock, EvaluateRelevancyBlock
20
19
  from .filtering import ColumnValueFilterBlock
21
- from .llm import LLMChatBlock, PromptBuilderBlock, TextParserBlock
20
+ from .llm import LLMChatBlock, LLMParserBlock, PromptBuilderBlock, TextParserBlock
22
21
  from .registry import BlockRegistry
23
22
  from .transform import (
24
23
  DuplicateColumnsBlock,
@@ -51,8 +50,7 @@ __all__ = [
51
50
  "SetToMajorityValue", # Deprecated
52
51
  "LLMBlock", # Deprecated
53
52
  "LLMChatBlock",
53
+ "LLMParserBlock",
54
54
  "TextParserBlock",
55
55
  "PromptBuilderBlock",
56
- "EvaluateFaithfulnessBlock",
57
- "EvaluateRelevancyBlock",
58
56
  ]
@@ -261,18 +261,73 @@ class BaseBlock(BaseModel, ABC):
261
261
  ----------
262
262
  samples : Dataset
263
263
  Input dataset.
264
+ **kwargs : Any
265
+ Runtime parameters to override block configuration
264
266
 
265
267
  Returns
266
268
  -------
267
269
  Dataset
268
270
  Output dataset after block processing.
269
271
  """
270
- self._log_input_data(samples)
271
- self._validate_dataset(samples)
272
- self._validate_custom(samples)
273
- output_dataset = self.generate(samples, **kwargs)
274
- self._log_output_data(samples, output_dataset)
275
- return output_dataset
272
+ # Handle runtime kwargs overrides
273
+ if kwargs:
274
+ # Validate that all kwargs are either valid block fields or flow parameters
275
+ # Skip validation for blocks that accept arbitrary parameters (extra="allow")
276
+ allows_extra = self.model_config.get("extra") == "allow"
277
+ if not allows_extra:
278
+ for key in kwargs:
279
+ if (
280
+ not key.startswith("_flow_")
281
+ and key not in self.__class__.model_fields
282
+ ):
283
+ logger.warning(
284
+ f"Unknown field '{key}' passed to {self.__class__.__name__}. "
285
+ f"This may be a provider-specific parameter or typo. "
286
+ f"Valid fields: {list(self.__class__.model_fields.keys())}"
287
+ )
288
+
289
+ # Only override actual block fields (not flow parameters)
290
+ block_overrides = {
291
+ k: v for k, v in kwargs.items() if k in self.__class__.model_fields
292
+ }
293
+
294
+ # Validate and apply block field overrides if any
295
+ original_values = {}
296
+ if block_overrides:
297
+ # Validate the merged configuration for block fields only
298
+ merged_config = {**self.model_dump(), **block_overrides}
299
+ try:
300
+ self.__class__.model_validate(merged_config)
301
+ except Exception as e:
302
+ raise ValueError(
303
+ f"Invalid runtime override for {self.__class__.__name__}: {e}"
304
+ ) from e
305
+
306
+ # Apply temporary overrides for block fields
307
+ for key, value in block_overrides.items():
308
+ original_values[key] = getattr(self, key)
309
+ setattr(self, key, value)
310
+
311
+ try:
312
+ self._log_input_data(samples)
313
+ self._validate_dataset(samples)
314
+ self._validate_custom(samples)
315
+ # Pass ALL kwargs to generate (including flow params)
316
+ output_dataset = self.generate(samples, **kwargs)
317
+ self._log_output_data(samples, output_dataset)
318
+ return output_dataset
319
+ finally:
320
+ # Always restore original values for block fields
321
+ for key, value in original_values.items():
322
+ setattr(self, key, value)
323
+ else:
324
+ # Normal execution without overrides
325
+ self._log_input_data(samples)
326
+ self._validate_dataset(samples)
327
+ self._validate_custom(samples)
328
+ output_dataset = self.generate(samples)
329
+ self._log_output_data(samples, output_dataset)
330
+ return output_dataset
276
331
 
277
332
  def __repr__(self) -> str:
278
333
  """Compact string representation."""
@@ -119,7 +119,7 @@ class ColumnValueFilterBlock(BaseBlock):
119
119
  self.output_cols = []
120
120
 
121
121
  # Set derived attributes
122
- self.value = (
122
+ self.filter_value = (
123
123
  self.filter_value
124
124
  if isinstance(self.filter_value, list)
125
125
  else [self.filter_value]
@@ -181,7 +181,8 @@ class ColumnValueFilterBlock(BaseBlock):
181
181
  # Apply filter operation
182
182
  samples = samples.filter(
183
183
  lambda x: any(
184
- self._operation_func(x[self.column_name], value) for value in self.value
184
+ self._operation_func(x[self.column_name], value)
185
+ for value in self.filter_value
185
186
  )
186
187
  )
187
188
 
@@ -7,21 +7,19 @@ local models (vLLM, Ollama), and more.
7
7
  """
8
8
 
9
9
  # Local
10
- from .client_manager import LLMClientManager
11
- from .config import LLMConfig
12
10
  from .error_handler import ErrorCategory, LLMErrorHandler
13
11
  from .llm_chat_block import LLMChatBlock
14
12
  from .llm_chat_with_parsing_retry_block import LLMChatWithParsingRetryBlock
13
+ from .llm_parser_block import LLMParserBlock
15
14
  from .prompt_builder_block import PromptBuilderBlock
16
15
  from .text_parser_block import TextParserBlock
17
16
 
18
17
  __all__ = [
19
- "LLMConfig",
20
- "LLMClientManager",
21
18
  "LLMErrorHandler",
22
19
  "ErrorCategory",
23
20
  "LLMChatBlock",
24
21
  "LLMChatWithParsingRetryBlock",
22
+ "LLMParserBlock",
25
23
  "PromptBuilderBlock",
26
24
  "TextParserBlock",
27
25
  ]