sdg-hub 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +2 -2
- sdg_hub/core/blocks/__init__.py +2 -4
- sdg_hub/core/blocks/base.py +61 -6
- sdg_hub/core/blocks/filtering/column_value_filter.py +3 -2
- sdg_hub/core/blocks/llm/__init__.py +2 -4
- sdg_hub/core/blocks/llm/llm_chat_block.py +251 -265
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +216 -98
- sdg_hub/core/blocks/llm/llm_parser_block.py +320 -0
- sdg_hub/core/blocks/llm/text_parser_block.py +53 -152
- sdg_hub/core/flow/base.py +7 -4
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +51 -11
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +159 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +51 -11
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +14 -2
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +146 -26
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py +0 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml +41 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml +14 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +304 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml +55 -0
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml +28 -4
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/METADATA +1 -1
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/RECORD +29 -25
- sdg_hub/core/blocks/evaluation/__init__.py +0 -9
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +0 -323
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +0 -323
- sdg_hub/core/blocks/evaluation/verify_question_block.py +0 -329
- sdg_hub/core/blocks/llm/client_manager.py +0 -472
- sdg_hub/core/blocks/llm/config.py +0 -337
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/WHEEL +0 -0
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.3.1.dist-info → sdg_hub-0.4.0.dist-info}/top_level.txt +0 -0
sdg_hub/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
31
|
+
__version__ = version = '0.4.0'
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 0)
|
33
33
|
|
34
34
|
__commit_id__ = commit_id = None
|
sdg_hub/core/blocks/__init__.py
CHANGED
@@ -16,9 +16,8 @@ from .deprecated_blocks import (
|
|
16
16
|
SelectorBlock,
|
17
17
|
SetToMajorityValue,
|
18
18
|
)
|
19
|
-
from .evaluation import EvaluateFaithfulnessBlock, EvaluateRelevancyBlock
|
20
19
|
from .filtering import ColumnValueFilterBlock
|
21
|
-
from .llm import LLMChatBlock, PromptBuilderBlock, TextParserBlock
|
20
|
+
from .llm import LLMChatBlock, LLMParserBlock, PromptBuilderBlock, TextParserBlock
|
22
21
|
from .registry import BlockRegistry
|
23
22
|
from .transform import (
|
24
23
|
DuplicateColumnsBlock,
|
@@ -51,8 +50,7 @@ __all__ = [
|
|
51
50
|
"SetToMajorityValue", # Deprecated
|
52
51
|
"LLMBlock", # Deprecated
|
53
52
|
"LLMChatBlock",
|
53
|
+
"LLMParserBlock",
|
54
54
|
"TextParserBlock",
|
55
55
|
"PromptBuilderBlock",
|
56
|
-
"EvaluateFaithfulnessBlock",
|
57
|
-
"EvaluateRelevancyBlock",
|
58
56
|
]
|
sdg_hub/core/blocks/base.py
CHANGED
@@ -261,18 +261,73 @@ class BaseBlock(BaseModel, ABC):
|
|
261
261
|
----------
|
262
262
|
samples : Dataset
|
263
263
|
Input dataset.
|
264
|
+
**kwargs : Any
|
265
|
+
Runtime parameters to override block configuration
|
264
266
|
|
265
267
|
Returns
|
266
268
|
-------
|
267
269
|
Dataset
|
268
270
|
Output dataset after block processing.
|
269
271
|
"""
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
272
|
+
# Handle runtime kwargs overrides
|
273
|
+
if kwargs:
|
274
|
+
# Validate that all kwargs are either valid block fields or flow parameters
|
275
|
+
# Skip validation for blocks that accept arbitrary parameters (extra="allow")
|
276
|
+
allows_extra = self.model_config.get("extra") == "allow"
|
277
|
+
if not allows_extra:
|
278
|
+
for key in kwargs:
|
279
|
+
if (
|
280
|
+
not key.startswith("_flow_")
|
281
|
+
and key not in self.__class__.model_fields
|
282
|
+
):
|
283
|
+
logger.warning(
|
284
|
+
f"Unknown field '{key}' passed to {self.__class__.__name__}. "
|
285
|
+
f"This may be a provider-specific parameter or typo. "
|
286
|
+
f"Valid fields: {list(self.__class__.model_fields.keys())}"
|
287
|
+
)
|
288
|
+
|
289
|
+
# Only override actual block fields (not flow parameters)
|
290
|
+
block_overrides = {
|
291
|
+
k: v for k, v in kwargs.items() if k in self.__class__.model_fields
|
292
|
+
}
|
293
|
+
|
294
|
+
# Validate and apply block field overrides if any
|
295
|
+
original_values = {}
|
296
|
+
if block_overrides:
|
297
|
+
# Validate the merged configuration for block fields only
|
298
|
+
merged_config = {**self.model_dump(), **block_overrides}
|
299
|
+
try:
|
300
|
+
self.__class__.model_validate(merged_config)
|
301
|
+
except Exception as e:
|
302
|
+
raise ValueError(
|
303
|
+
f"Invalid runtime override for {self.__class__.__name__}: {e}"
|
304
|
+
) from e
|
305
|
+
|
306
|
+
# Apply temporary overrides for block fields
|
307
|
+
for key, value in block_overrides.items():
|
308
|
+
original_values[key] = getattr(self, key)
|
309
|
+
setattr(self, key, value)
|
310
|
+
|
311
|
+
try:
|
312
|
+
self._log_input_data(samples)
|
313
|
+
self._validate_dataset(samples)
|
314
|
+
self._validate_custom(samples)
|
315
|
+
# Pass ALL kwargs to generate (including flow params)
|
316
|
+
output_dataset = self.generate(samples, **kwargs)
|
317
|
+
self._log_output_data(samples, output_dataset)
|
318
|
+
return output_dataset
|
319
|
+
finally:
|
320
|
+
# Always restore original values for block fields
|
321
|
+
for key, value in original_values.items():
|
322
|
+
setattr(self, key, value)
|
323
|
+
else:
|
324
|
+
# Normal execution without overrides
|
325
|
+
self._log_input_data(samples)
|
326
|
+
self._validate_dataset(samples)
|
327
|
+
self._validate_custom(samples)
|
328
|
+
output_dataset = self.generate(samples)
|
329
|
+
self._log_output_data(samples, output_dataset)
|
330
|
+
return output_dataset
|
276
331
|
|
277
332
|
def __repr__(self) -> str:
|
278
333
|
"""Compact string representation."""
|
@@ -119,7 +119,7 @@ class ColumnValueFilterBlock(BaseBlock):
|
|
119
119
|
self.output_cols = []
|
120
120
|
|
121
121
|
# Set derived attributes
|
122
|
-
self.
|
122
|
+
self.filter_value = (
|
123
123
|
self.filter_value
|
124
124
|
if isinstance(self.filter_value, list)
|
125
125
|
else [self.filter_value]
|
@@ -181,7 +181,8 @@ class ColumnValueFilterBlock(BaseBlock):
|
|
181
181
|
# Apply filter operation
|
182
182
|
samples = samples.filter(
|
183
183
|
lambda x: any(
|
184
|
-
self._operation_func(x[self.column_name], value)
|
184
|
+
self._operation_func(x[self.column_name], value)
|
185
|
+
for value in self.filter_value
|
185
186
|
)
|
186
187
|
)
|
187
188
|
|
@@ -7,21 +7,19 @@ local models (vLLM, Ollama), and more.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
# Local
|
10
|
-
from .client_manager import LLMClientManager
|
11
|
-
from .config import LLMConfig
|
12
10
|
from .error_handler import ErrorCategory, LLMErrorHandler
|
13
11
|
from .llm_chat_block import LLMChatBlock
|
14
12
|
from .llm_chat_with_parsing_retry_block import LLMChatWithParsingRetryBlock
|
13
|
+
from .llm_parser_block import LLMParserBlock
|
15
14
|
from .prompt_builder_block import PromptBuilderBlock
|
16
15
|
from .text_parser_block import TextParserBlock
|
17
16
|
|
18
17
|
__all__ = [
|
19
|
-
"LLMConfig",
|
20
|
-
"LLMClientManager",
|
21
18
|
"LLMErrorHandler",
|
22
19
|
"ErrorCategory",
|
23
20
|
"LLMChatBlock",
|
24
21
|
"LLMChatWithParsingRetryBlock",
|
22
|
+
"LLMParserBlock",
|
25
23
|
"PromptBuilderBlock",
|
26
24
|
"TextParserBlock",
|
27
25
|
]
|