sdg-hub 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +16 -3
- sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
- sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
- sdg_hub/core/blocks/llm/client_manager.py +61 -24
- sdg_hub/core/blocks/llm/config.py +1 -0
- sdg_hub/core/blocks/llm/llm_chat_block.py +62 -7
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +277 -115
- sdg_hub/core/blocks/llm/text_parser_block.py +0 -2
- sdg_hub/core/blocks/registry.py +48 -34
- sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
- sdg_hub/core/flow/base.py +131 -10
- sdg_hub/core/utils/datautils.py +29 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +0 -7
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.2.2.dist-info}/METADATA +40 -15
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.2.2.dist-info}/RECORD +20 -20
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.2.2.dist-info}/WHEEL +0 -0
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.2.2.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.2.1.dist-info → sdg_hub-0.2.2.dist-info}/top_level.txt +0 -0
sdg_hub/core/flow/base.py
CHANGED
@@ -13,18 +13,19 @@ from rich.console import Console
|
|
13
13
|
from rich.panel import Panel
|
14
14
|
from rich.table import Table
|
15
15
|
from rich.tree import Tree
|
16
|
+
import datasets
|
16
17
|
import yaml
|
17
18
|
|
18
19
|
# Local
|
19
20
|
from ..blocks.base import BaseBlock
|
20
21
|
from ..blocks.registry import BlockRegistry
|
21
|
-
from ..utils.datautils import safe_concatenate_with_validation
|
22
|
+
from ..utils.datautils import safe_concatenate_with_validation, validate_no_duplicates
|
22
23
|
from ..utils.error_handling import EmptyDatasetError, FlowValidationError
|
23
24
|
from ..utils.logger_config import setup_logger
|
24
25
|
from ..utils.path_resolution import resolve_path
|
25
26
|
from ..utils.yaml_utils import save_flow_yaml
|
26
27
|
from .checkpointer import FlowCheckpointer
|
27
|
-
from .metadata import FlowMetadata, FlowParameter
|
28
|
+
from .metadata import DatasetRequirements, FlowMetadata, FlowParameter
|
28
29
|
from .migration import FlowMigration
|
29
30
|
from .validation import FlowValidator
|
30
31
|
|
@@ -306,13 +307,11 @@ class Flow(BaseModel):
|
|
306
307
|
|
307
308
|
# Get block class from registry
|
308
309
|
try:
|
309
|
-
block_class = BlockRegistry.
|
310
|
+
block_class = BlockRegistry._get(block_type_name)
|
310
311
|
except KeyError as exc:
|
311
312
|
# Get all available blocks from all categories
|
312
|
-
all_blocks = BlockRegistry.
|
313
|
-
available_blocks = ", ".join(
|
314
|
-
[block for blocks in all_blocks.values() for block in blocks]
|
315
|
-
)
|
313
|
+
all_blocks = BlockRegistry.list_blocks()
|
314
|
+
available_blocks = ", ".join(all_blocks)
|
316
315
|
raise FlowValidationError(
|
317
316
|
f"Block type '{block_type_name}' not found in registry. "
|
318
317
|
f"Available blocks: {available_blocks}"
|
@@ -357,6 +356,7 @@ class Flow(BaseModel):
|
|
357
356
|
runtime_params: Optional[dict[str, dict[str, Any]]] = None,
|
358
357
|
checkpoint_dir: Optional[str] = None,
|
359
358
|
save_freq: Optional[int] = None,
|
359
|
+
max_concurrency: Optional[int] = None,
|
360
360
|
) -> Dataset:
|
361
361
|
"""Execute the flow blocks in sequence to generate data.
|
362
362
|
|
@@ -378,6 +378,9 @@ class Flow(BaseModel):
|
|
378
378
|
save_freq : Optional[int], optional
|
379
379
|
Number of completed samples after which to save a checkpoint.
|
380
380
|
If None, only saves final results when checkpointing is enabled.
|
381
|
+
max_concurrency : Optional[int], optional
|
382
|
+
Maximum number of concurrent requests across all blocks.
|
383
|
+
Controls async request concurrency to prevent overwhelming servers.
|
381
384
|
|
382
385
|
Returns
|
383
386
|
-------
|
@@ -397,6 +400,20 @@ class Flow(BaseModel):
|
|
397
400
|
f"save_freq must be greater than 0, got {save_freq}"
|
398
401
|
)
|
399
402
|
|
403
|
+
# Validate max_concurrency parameter
|
404
|
+
if max_concurrency is not None:
|
405
|
+
# Explicitly reject boolean values (bool is a subclass of int in Python)
|
406
|
+
if isinstance(max_concurrency, bool) or not isinstance(
|
407
|
+
max_concurrency, int
|
408
|
+
):
|
409
|
+
raise FlowValidationError(
|
410
|
+
f"max_concurrency must be an int, got {type(max_concurrency).__name__}"
|
411
|
+
)
|
412
|
+
if max_concurrency <= 0:
|
413
|
+
raise FlowValidationError(
|
414
|
+
f"max_concurrency must be greater than 0, got {max_concurrency}"
|
415
|
+
)
|
416
|
+
|
400
417
|
# Validate preconditions
|
401
418
|
if not self.blocks:
|
402
419
|
raise FlowValidationError("Cannot generate with empty flow")
|
@@ -404,6 +421,8 @@ class Flow(BaseModel):
|
|
404
421
|
if len(dataset) == 0:
|
405
422
|
raise EmptyDatasetError("Input dataset is empty")
|
406
423
|
|
424
|
+
validate_no_duplicates(dataset)
|
425
|
+
|
407
426
|
# Check if model configuration has been set for flows with LLM blocks
|
408
427
|
llm_blocks = self._detect_llm_blocks()
|
409
428
|
if llm_blocks and not self._model_config_set:
|
@@ -420,6 +439,10 @@ class Flow(BaseModel):
|
|
420
439
|
"Dataset validation failed:\n" + "\n".join(dataset_errors)
|
421
440
|
)
|
422
441
|
|
442
|
+
# Log concurrency control if specified
|
443
|
+
if max_concurrency is not None:
|
444
|
+
logger.info(f"Using max_concurrency={max_concurrency} for LLM requests")
|
445
|
+
|
423
446
|
# Initialize checkpointer if enabled
|
424
447
|
checkpointer = None
|
425
448
|
completed_dataset = None
|
@@ -445,6 +468,7 @@ class Flow(BaseModel):
|
|
445
468
|
logger.info(
|
446
469
|
f"Starting flow '{self.metadata.name}' v{self.metadata.version} "
|
447
470
|
f"with {len(dataset)} samples across {len(self.blocks)} blocks"
|
471
|
+
+ (f" (max_concurrency={max_concurrency})" if max_concurrency else "")
|
448
472
|
)
|
449
473
|
|
450
474
|
# Merge migrated runtime params with provided ones (provided ones take precedence)
|
@@ -468,7 +492,7 @@ class Flow(BaseModel):
|
|
468
492
|
|
469
493
|
# Execute all blocks on this chunk
|
470
494
|
processed_chunk = self._execute_blocks_on_dataset(
|
471
|
-
chunk_dataset, runtime_params
|
495
|
+
chunk_dataset, runtime_params, max_concurrency
|
472
496
|
)
|
473
497
|
all_processed.append(processed_chunk)
|
474
498
|
|
@@ -492,7 +516,9 @@ class Flow(BaseModel):
|
|
492
516
|
|
493
517
|
else:
|
494
518
|
# Process entire dataset at once
|
495
|
-
final_dataset = self._execute_blocks_on_dataset(
|
519
|
+
final_dataset = self._execute_blocks_on_dataset(
|
520
|
+
dataset, runtime_params, max_concurrency
|
521
|
+
)
|
496
522
|
|
497
523
|
# Save final checkpoint if checkpointing enabled
|
498
524
|
if checkpointer:
|
@@ -515,7 +541,10 @@ class Flow(BaseModel):
|
|
515
541
|
return final_dataset
|
516
542
|
|
517
543
|
def _execute_blocks_on_dataset(
|
518
|
-
self,
|
544
|
+
self,
|
545
|
+
dataset: Dataset,
|
546
|
+
runtime_params: dict[str, dict[str, Any]],
|
547
|
+
max_concurrency: Optional[int] = None,
|
519
548
|
) -> Dataset:
|
520
549
|
"""Execute all blocks in sequence on the given dataset.
|
521
550
|
|
@@ -525,6 +554,8 @@ class Flow(BaseModel):
|
|
525
554
|
Dataset to process through all blocks.
|
526
555
|
runtime_params : Dict[str, Dict[str, Any]]
|
527
556
|
Runtime parameters for block execution.
|
557
|
+
max_concurrency : Optional[int], optional
|
558
|
+
Maximum concurrency for LLM requests across blocks.
|
528
559
|
|
529
560
|
Returns
|
530
561
|
-------
|
@@ -543,6 +574,10 @@ class Flow(BaseModel):
|
|
543
574
|
# Prepare block execution parameters
|
544
575
|
block_kwargs = self._prepare_block_kwargs(block, runtime_params)
|
545
576
|
|
577
|
+
# Add max_concurrency to block kwargs if provided
|
578
|
+
if max_concurrency is not None:
|
579
|
+
block_kwargs["_flow_max_concurrency"] = max_concurrency
|
580
|
+
|
546
581
|
try:
|
547
582
|
# Check if this is a deprecated block and skip validations
|
548
583
|
is_deprecated_block = (
|
@@ -899,6 +934,8 @@ class Flow(BaseModel):
|
|
899
934
|
if len(dataset) == 0:
|
900
935
|
raise EmptyDatasetError("Input dataset is empty")
|
901
936
|
|
937
|
+
validate_no_duplicates(dataset)
|
938
|
+
|
902
939
|
# Use smaller sample size if dataset is smaller
|
903
940
|
actual_sample_size = min(sample_size, len(dataset))
|
904
941
|
|
@@ -1066,6 +1103,90 @@ class Flow(BaseModel):
|
|
1066
1103
|
"block_names": [block.block_name for block in self.blocks],
|
1067
1104
|
}
|
1068
1105
|
|
1106
|
+
def get_dataset_requirements(self) -> Optional[DatasetRequirements]:
|
1107
|
+
"""Get the dataset requirements for this flow.
|
1108
|
+
|
1109
|
+
Returns
|
1110
|
+
-------
|
1111
|
+
Optional[DatasetRequirements]
|
1112
|
+
Dataset requirements object or None if not defined.
|
1113
|
+
|
1114
|
+
Examples
|
1115
|
+
--------
|
1116
|
+
>>> flow = Flow.from_yaml("path/to/flow.yaml")
|
1117
|
+
>>> requirements = flow.get_dataset_requirements()
|
1118
|
+
>>> if requirements:
|
1119
|
+
... print(f"Required columns: {requirements.required_columns}")
|
1120
|
+
"""
|
1121
|
+
return self.metadata.dataset_requirements
|
1122
|
+
|
1123
|
+
def get_dataset_schema(self) -> Dataset:
|
1124
|
+
"""Get an empty dataset with the correct schema for this flow.
|
1125
|
+
|
1126
|
+
Returns
|
1127
|
+
-------
|
1128
|
+
Dataset
|
1129
|
+
Empty HuggingFace Dataset with the correct schema/features for this flow.
|
1130
|
+
Users can add data to this dataset or use it to validate their own dataset schema.
|
1131
|
+
|
1132
|
+
Examples
|
1133
|
+
--------
|
1134
|
+
>>> flow = Flow.from_yaml("path/to/flow.yaml")
|
1135
|
+
>>> schema_dataset = flow.get_dataset_schema()
|
1136
|
+
>>>
|
1137
|
+
>>> # Add your data
|
1138
|
+
>>> schema_dataset = schema_dataset.add_item({
|
1139
|
+
... "document": "Your document text",
|
1140
|
+
... "domain": "Computer Science",
|
1141
|
+
... "icl_document": "Example document"
|
1142
|
+
... })
|
1143
|
+
>>>
|
1144
|
+
>>> # Or validate your existing dataset schema
|
1145
|
+
>>> my_dataset = Dataset.from_dict(my_data)
|
1146
|
+
>>> if my_dataset.features == schema_dataset.features:
|
1147
|
+
... print("Schema matches!")
|
1148
|
+
"""
|
1149
|
+
|
1150
|
+
requirements = self.get_dataset_requirements()
|
1151
|
+
|
1152
|
+
if requirements is None:
|
1153
|
+
# Return empty dataset with no schema requirements
|
1154
|
+
return Dataset.from_dict({})
|
1155
|
+
|
1156
|
+
# Build schema features
|
1157
|
+
schema_features = {}
|
1158
|
+
|
1159
|
+
# Process required columns
|
1160
|
+
for col_name in requirements.required_columns:
|
1161
|
+
col_type = requirements.column_types.get(col_name, "string")
|
1162
|
+
schema_features[col_name] = self._map_column_type_to_feature(col_type)
|
1163
|
+
|
1164
|
+
# Process optional columns
|
1165
|
+
for col_name in requirements.optional_columns:
|
1166
|
+
col_type = requirements.column_types.get(col_name, "string")
|
1167
|
+
schema_features[col_name] = self._map_column_type_to_feature(col_type)
|
1168
|
+
|
1169
|
+
# Create empty dataset with the correct features
|
1170
|
+
features = datasets.Features(schema_features)
|
1171
|
+
empty_data = {col_name: [] for col_name in schema_features.keys()}
|
1172
|
+
|
1173
|
+
return Dataset.from_dict(empty_data, features=features)
|
1174
|
+
|
1175
|
+
def _map_column_type_to_feature(self, col_type: str):
|
1176
|
+
"""Map column type string to HuggingFace feature type."""
|
1177
|
+
# Map common type names to HuggingFace types
|
1178
|
+
if col_type in ["str", "string", "text"]:
|
1179
|
+
return datasets.Value("string")
|
1180
|
+
elif col_type in ["int", "integer"]:
|
1181
|
+
return datasets.Value("int64")
|
1182
|
+
elif col_type in ["float", "number"]:
|
1183
|
+
return datasets.Value("float64")
|
1184
|
+
elif col_type in ["bool", "boolean"]:
|
1185
|
+
return datasets.Value("bool")
|
1186
|
+
else:
|
1187
|
+
# Default to string for unknown types
|
1188
|
+
return datasets.Value("string")
|
1189
|
+
|
1069
1190
|
def print_info(self) -> None:
|
1070
1191
|
"""
|
1071
1192
|
Print an interactive summary of the Flow in the console.
|
sdg_hub/core/utils/datautils.py
CHANGED
@@ -15,6 +15,35 @@ def safe_concatenate_datasets(datasets: list):
|
|
15
15
|
return concatenate_datasets(filtered_datasets)
|
16
16
|
|
17
17
|
|
18
|
+
def validate_no_duplicates(dataset: Dataset) -> None:
|
19
|
+
"""
|
20
|
+
Validate that the input dataset contains only unique rows.
|
21
|
+
|
22
|
+
Uses pandas `.duplicated()` for efficient duplicate detection.
|
23
|
+
Raises FlowValidationError if duplicates are found, including a count
|
24
|
+
of the duplicate rows detected.
|
25
|
+
|
26
|
+
Parameters
|
27
|
+
----------
|
28
|
+
dataset : Dataset
|
29
|
+
Input dataset to validate.
|
30
|
+
|
31
|
+
Raises
|
32
|
+
------
|
33
|
+
FlowValidationError
|
34
|
+
If duplicate rows are detected in the dataset.
|
35
|
+
"""
|
36
|
+
df = dataset.to_pandas()
|
37
|
+
duplicate_count = int(df.duplicated(keep="first").sum())
|
38
|
+
|
39
|
+
if duplicate_count > 0:
|
40
|
+
raise FlowValidationError(
|
41
|
+
f"Input dataset contains {duplicate_count} duplicate rows. "
|
42
|
+
f"SDG Hub operations require unique input rows. "
|
43
|
+
f"Please deduplicate your dataset before processing."
|
44
|
+
)
|
45
|
+
|
46
|
+
|
18
47
|
def safe_concatenate_with_validation(
|
19
48
|
datasets: list, context: str = "datasets"
|
20
49
|
) -> Dataset:
|
@@ -46,7 +46,6 @@ blocks:
|
|
46
46
|
input_cols: [document, document_outline]
|
47
47
|
output_cols: summary_prompt
|
48
48
|
prompt_config_path: detailed_summary.yaml
|
49
|
-
format_as_messages: true
|
50
49
|
|
51
50
|
- block_type: LLMChatBlock
|
52
51
|
block_config:
|
@@ -70,7 +69,6 @@ blocks:
|
|
70
69
|
input_cols: [document, document_outline, domain]
|
71
70
|
output_cols: atomic_facts_prompt
|
72
71
|
prompt_config_path: atomic_facts.yaml
|
73
|
-
format_as_messages: true
|
74
72
|
|
75
73
|
- block_type: LLMChatBlock
|
76
74
|
block_config:
|
@@ -94,7 +92,6 @@ blocks:
|
|
94
92
|
input_cols: [document, document_outline]
|
95
93
|
output_cols: extractive_summary_prompt
|
96
94
|
prompt_config_path: extractive_summary.yaml
|
97
|
-
format_as_messages: true
|
98
95
|
|
99
96
|
- block_type: LLMChatBlock
|
100
97
|
block_config:
|
@@ -129,7 +126,6 @@ blocks:
|
|
129
126
|
input_cols: [domain, document, document_outline, icl_document, icl_query_1, icl_response_1, icl_query_2, icl_response_2, icl_query_3, icl_response_3]
|
130
127
|
output_cols: knowledge_generation_prompt
|
131
128
|
prompt_config_path: generate_questions_responses.yaml
|
132
|
-
format_as_messages: true
|
133
129
|
|
134
130
|
- block_type: LLMChatBlock
|
135
131
|
block_config:
|
@@ -157,7 +153,6 @@ blocks:
|
|
157
153
|
filter_value: "YES"
|
158
154
|
operation: eq
|
159
155
|
async_mode: true
|
160
|
-
format_as_messages: true
|
161
156
|
start_tags: ["[Start of Explanation]", "[Start of Answer]"]
|
162
157
|
end_tags: ["[End of Explanation]", "[End of Answer]"]
|
163
158
|
|
@@ -172,7 +167,6 @@ blocks:
|
|
172
167
|
convert_dtype: float
|
173
168
|
max_tokens: 2048
|
174
169
|
async_mode: true
|
175
|
-
format_as_messages: true
|
176
170
|
start_tags: ["[Start of Feedback]", "[Start of Score]"]
|
177
171
|
end_tags: ["[End of Feedback]", "[End of Score]"]
|
178
172
|
|
@@ -187,6 +181,5 @@ blocks:
|
|
187
181
|
convert_dtype: float
|
188
182
|
max_tokens: 2048
|
189
183
|
async_mode: true
|
190
|
-
format_as_messages: true
|
191
184
|
start_tags: ["[Start of Explanation]", "[Start of Rating]"]
|
192
185
|
end_tags: ["[End of Explanation]", "[End of Rating]"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sdg_hub
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.2
|
4
4
|
Summary: Synthetic Data Generation
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
6
6
|
License: Apache-2.0
|
@@ -27,7 +27,6 @@ Requires-Dist: datasets<4.0.0,>=2.18.0
|
|
27
27
|
Requires-Dist: httpx<1.0.0,>=0.25.0
|
28
28
|
Requires-Dist: jinja2
|
29
29
|
Requires-Dist: litellm<1.75.0,>=1.73.0
|
30
|
-
Requires-Dist: openai<2.0.0,>=1.13.3
|
31
30
|
Requires-Dist: rich
|
32
31
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
33
32
|
Requires-Dist: python-dotenv<2.0.0,>=1.0.0
|
@@ -92,6 +91,8 @@ A modular Python framework for building synthetic data generation pipelines usin
|
|
92
91
|
|
93
92
|
**📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
|
94
93
|
|
94
|
+
**📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
|
95
|
+
|
95
96
|
**🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
|
96
97
|
|
97
98
|
|
@@ -176,22 +177,46 @@ flow.set_model_config(
|
|
176
177
|
api_key="your_key",
|
177
178
|
)
|
178
179
|
```
|
179
|
-
####
|
180
|
+
#### Discover dataset requirements and create your dataset
|
180
181
|
```python
|
181
|
-
#
|
182
|
-
dataset
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
'
|
190
|
-
'
|
191
|
-
'
|
192
|
-
'
|
182
|
+
# First, discover what data the flow needs
|
183
|
+
# Get an empty dataset with the exact schema needed
|
184
|
+
schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
|
185
|
+
print(f"Required columns: {schema_dataset.column_names}")
|
186
|
+
print(f"Schema: {schema_dataset.features}")
|
187
|
+
|
188
|
+
# Option 1: Add data directly to the schema dataset
|
189
|
+
dataset = schema_dataset.add_item({
|
190
|
+
'document': 'Your document text here...',
|
191
|
+
'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
|
192
|
+
'domain': 'Computer Science',
|
193
|
+
'icl_document': 'Example document for in-context learning...',
|
194
|
+
'icl_query_1': 'Example question 1?',
|
195
|
+
'icl_response_1': 'Example answer 1',
|
196
|
+
'icl_query_2': 'Example question 2?',
|
197
|
+
'icl_response_2': 'Example answer 2',
|
198
|
+
'icl_query_3': 'Example question 3?',
|
199
|
+
'icl_response_3': 'Example answer 3'
|
193
200
|
})
|
194
201
|
|
202
|
+
# Option 2: Create your own dataset and validate the schema
|
203
|
+
my_dataset = Dataset.from_dict(my_data_dict)
|
204
|
+
if my_dataset.features == schema_dataset.features:
|
205
|
+
print("✅ Schema matches - ready to generate!")
|
206
|
+
dataset = my_dataset
|
207
|
+
else:
|
208
|
+
print("❌ Schema mismatch - check your columns")
|
209
|
+
|
210
|
+
# Option 3: Get raw requirements for detailed inspection
|
211
|
+
requirements = flow.get_dataset_requirements()
|
212
|
+
if requirements:
|
213
|
+
print(f"Required: {requirements.required_columns}")
|
214
|
+
print(f"Optional: {requirements.optional_columns}")
|
215
|
+
print(f"Min samples: {requirements.min_samples}")
|
216
|
+
```
|
217
|
+
|
218
|
+
#### Dry Run and Generate
|
219
|
+
```python
|
195
220
|
# Quick Testing with Dry Run
|
196
221
|
dry_result = flow.dry_run(dataset, sample_size=1)
|
197
222
|
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
@@ -1,10 +1,10 @@
|
|
1
1
|
sdg_hub/__init__.py,sha256=Tw-6R5a8_W1kJcTAsW3R9ltBDP1dy5-fe7Tvt3cSyCQ,550
|
2
|
-
sdg_hub/_version.py,sha256=
|
2
|
+
sdg_hub/_version.py,sha256=o3ZTescp-19Z9cvBGq9dQnbppljgzdUYUf98Nov0spY,704
|
3
3
|
sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sdg_hub/core/__init__.py,sha256=NwqB4fwhC29W50VW7QXZssLxx122YvgO9LHDLdgAnrI,496
|
5
5
|
sdg_hub/core/blocks/__init__.py,sha256=9sCkCvDQzJGSedaePVlEIpbNwrkBz_K500VW_6FLhuE,1601
|
6
6
|
sdg_hub/core/blocks/base.py,sha256=TrzUAkG7Tiquk0Z3SOFsb5mRnHd1IbHH6gFPVH1P7T8,10424
|
7
|
-
sdg_hub/core/blocks/registry.py,sha256=
|
7
|
+
sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
|
8
8
|
sdg_hub/core/blocks/deprecated_blocks/__init__.py,sha256=RDu3MWFStDQko-TKkx8tGoB1UTatP_RSldZK43zHDvY,889
|
9
9
|
sdg_hub/core/blocks/deprecated_blocks/combine_columns.py,sha256=HCvpaYsAwgx1Dm0vIshcWsKoVsRT0KrmKp9j4oqtByc,2757
|
10
10
|
sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py,sha256=maCaaEs0EMMzt7L1xm7fAH3ylaFMHEkeC_dtOw3FrjU,2694
|
@@ -13,38 +13,38 @@ sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py,sha256=IenCskrPEv09h2uT
|
|
13
13
|
sdg_hub/core/blocks/deprecated_blocks/llmblock.py,sha256=34lzC43BODpMk5AwlWA1ctdYPmN7cA6WL5vMXaI0P0Y,20385
|
14
14
|
sdg_hub/core/blocks/deprecated_blocks/rename_columns.py,sha256=thp-mHtkRmUw_nYKpldy_mLWR2AvC5YUhbqDETM6-T0,2620
|
15
15
|
sdg_hub/core/blocks/deprecated_blocks/sample_populator.py,sha256=UdueMApxOmPWaxxMrw7b1v74fKJBfqqRATEBqgmVtNw,1737
|
16
|
-
sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=
|
16
|
+
sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=nWecsVsW8DvBcqAF_LOqXmW-5MQ28uN3d1y6wkSy38c,2960
|
17
17
|
sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py,sha256=44TQu-rK5isia-otMVB1zHd8D-wWmu3C8CI1NLtfY5s,2729
|
18
18
|
sdg_hub/core/blocks/evaluation/__init__.py,sha256=kFXee-vsVVdU2XtLio9qHgPx_a0zoB_rQr509EKBGJc,357
|
19
|
-
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=
|
20
|
-
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=
|
21
|
-
sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=
|
19
|
+
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=vFi3YIxVPNnzgdenIeAl7yUb4OOUY_uUOXS-pWLsDmw,12223
|
20
|
+
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=NXT1lixR-JnOXNlBCbMjULcpu4kh2SthhwCWEobiBt0,12115
|
21
|
+
sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=LKoIHdxUuTVO24n_M9cAliEj56uEe2kQAecKTRz65zI,12465
|
22
22
|
sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
|
23
23
|
sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=H8Gif0q9Wc_d1TnVow8Zpsg7blJOFGN1EZmV6OPpkcg,5971
|
24
24
|
sdg_hub/core/blocks/llm/__init__.py,sha256=N6-Prgd4X85oWbMQzhYMrq7OX-NTJm57cghowK-val0,844
|
25
|
-
sdg_hub/core/blocks/llm/client_manager.py,sha256=
|
26
|
-
sdg_hub/core/blocks/llm/config.py,sha256=
|
25
|
+
sdg_hub/core/blocks/llm/client_manager.py,sha256=PDf07t2s68WQaoU-LTRke6nQUDWDKwiV4ptAT7Cbn18,14047
|
26
|
+
sdg_hub/core/blocks/llm/config.py,sha256=gc4xp5D20MSlKMFEos0QAaKUwgbZpBtMGXmn6LsIk78,11289
|
27
27
|
sdg_hub/core/blocks/llm/error_handler.py,sha256=7T-019ZFB9qgZoX1ybIiXyaLjPzrF96qcKmUu6vmO6g,12178
|
28
|
-
sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=
|
29
|
-
sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=
|
28
|
+
sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=J-iDJTY-txuduFXx-NUhyohWpRnEaYIg4f0VsSgpjVw,22641
|
29
|
+
sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=H7LqYpEiFO1g2cxncAl4vkLhQxAYgGpV6cUyQTSG03k,27628
|
30
30
|
sdg_hub/core/blocks/llm/prompt_builder_block.py,sha256=fkJd718X1oYlMY1cjo_8WCO16Gl8Tm0bUPWR78E_uws,13935
|
31
|
-
sdg_hub/core/blocks/llm/text_parser_block.py,sha256=
|
31
|
+
sdg_hub/core/blocks/llm/text_parser_block.py,sha256=uYcof6cy1tgMG_mHpFBW4xhVXg40rNhTerM5EqAvcEo,14187
|
32
32
|
sdg_hub/core/blocks/transform/__init__.py,sha256=Y_3izPCtgnMbFK-gBMeLHZspSrNLgbGheAJXU57XfFw,746
|
33
33
|
sdg_hub/core/blocks/transform/duplicate_columns.py,sha256=SaP7rIF4ZFEFFa50aU2xGNIuddXaEZrKxdWfHjzFpVI,2833
|
34
|
-
sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=
|
34
|
+
sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=XC_a7Skbd3mu7f4ra8fGWPxMwqUMSjJkQ7Ag7vflwJA,8235
|
35
35
|
sdg_hub/core/blocks/transform/melt_columns.py,sha256=vaYa5Taq6GhNZYWFL4uPK3-SfN2BsKEm-wvjd2EYYoI,4382
|
36
36
|
sdg_hub/core/blocks/transform/rename_columns.py,sha256=qeB5L2utqDQnutUetH1VKZSqDiJSH_yUp5EFCV-XCVI,1998
|
37
37
|
sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
|
38
38
|
sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
|
39
39
|
sdg_hub/core/flow/__init__.py,sha256=N2NZGngvd7qpT5FI_knKukUFM0IkD9K5jdTi-gDeUI4,475
|
40
|
-
sdg_hub/core/flow/base.py,sha256=
|
40
|
+
sdg_hub/core/flow/base.py,sha256=eneLS9GR21q9nK3M8qZzIyJ-OeFF2Lp6ZwzQjBVnbyk,50364
|
41
41
|
sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
|
42
42
|
sdg_hub/core/flow/metadata.py,sha256=h9jpvAzWsF5n4ztZMzwa9ZNgnzKTHmFWdn7YbyJLHCw,12977
|
43
43
|
sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
|
44
44
|
sdg_hub/core/flow/registry.py,sha256=DzCqEEgwhvwnCBAGLogoMVdwXh4pCHrxOWqoxam7O8I,12162
|
45
45
|
sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
|
46
46
|
sdg_hub/core/utils/__init__.py,sha256=C2FzLn3dHprwGJDEgI4fyFS3aoCJR-9PhHsunxropJ8,351
|
47
|
-
sdg_hub/core/utils/datautils.py,sha256=
|
47
|
+
sdg_hub/core/utils/datautils.py,sha256=vvZSNZ94vMQMh9Bs99X92UPwSNzyyYwO3V4w3O3QYoA,2801
|
48
48
|
sdg_hub/core/utils/error_handling.py,sha256=yku8cGj_nKCyXDsnb-mHCpgukkkAMucJ4iAUrIzqysc,5510
|
49
49
|
sdg_hub/core/utils/flow_id_words.yaml,sha256=5QHpQdP7zwahRuooyAlJIwBY7WcDR7vtbJXxVJqujbg,2317
|
50
50
|
sdg_hub/core/utils/flow_identifier.py,sha256=aAHfK_G9AwEtMglLRMdMpi_AI1dciub5UqBGm4yb2HE,2841
|
@@ -59,10 +59,10 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
|
|
59
59
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
|
60
60
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
|
61
61
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
|
62
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=
|
62
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=oyrLRjEnmioMa_G_sd9yQK_nBt4arwWV5fvKgzYE2ds,6090
|
63
63
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
|
64
|
-
sdg_hub-0.2.
|
65
|
-
sdg_hub-0.2.
|
66
|
-
sdg_hub-0.2.
|
67
|
-
sdg_hub-0.2.
|
68
|
-
sdg_hub-0.2.
|
64
|
+
sdg_hub-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
65
|
+
sdg_hub-0.2.2.dist-info/METADATA,sha256=OHIiUh4AqSHVW-asGQdUp67TXL_dCRV7NVC0E14IwM0,9647
|
66
|
+
sdg_hub-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
67
|
+
sdg_hub-0.2.2.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
|
68
|
+
sdg_hub-0.2.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|