sdg-hub 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdg_hub/core/flow/base.py CHANGED
@@ -13,18 +13,19 @@ from rich.console import Console
13
13
  from rich.panel import Panel
14
14
  from rich.table import Table
15
15
  from rich.tree import Tree
16
+ import datasets
16
17
  import yaml
17
18
 
18
19
  # Local
19
20
  from ..blocks.base import BaseBlock
20
21
  from ..blocks.registry import BlockRegistry
21
- from ..utils.datautils import safe_concatenate_with_validation
22
+ from ..utils.datautils import safe_concatenate_with_validation, validate_no_duplicates
22
23
  from ..utils.error_handling import EmptyDatasetError, FlowValidationError
23
24
  from ..utils.logger_config import setup_logger
24
25
  from ..utils.path_resolution import resolve_path
25
26
  from ..utils.yaml_utils import save_flow_yaml
26
27
  from .checkpointer import FlowCheckpointer
27
- from .metadata import FlowMetadata, FlowParameter
28
+ from .metadata import DatasetRequirements, FlowMetadata, FlowParameter
28
29
  from .migration import FlowMigration
29
30
  from .validation import FlowValidator
30
31
 
@@ -306,13 +307,11 @@ class Flow(BaseModel):
306
307
 
307
308
  # Get block class from registry
308
309
  try:
309
- block_class = BlockRegistry.get(block_type_name)
310
+ block_class = BlockRegistry._get(block_type_name)
310
311
  except KeyError as exc:
311
312
  # Get all available blocks from all categories
312
- all_blocks = BlockRegistry.all()
313
- available_blocks = ", ".join(
314
- [block for blocks in all_blocks.values() for block in blocks]
315
- )
313
+ all_blocks = BlockRegistry.list_blocks()
314
+ available_blocks = ", ".join(all_blocks)
316
315
  raise FlowValidationError(
317
316
  f"Block type '{block_type_name}' not found in registry. "
318
317
  f"Available blocks: {available_blocks}"
@@ -357,6 +356,7 @@ class Flow(BaseModel):
357
356
  runtime_params: Optional[dict[str, dict[str, Any]]] = None,
358
357
  checkpoint_dir: Optional[str] = None,
359
358
  save_freq: Optional[int] = None,
359
+ max_concurrency: Optional[int] = None,
360
360
  ) -> Dataset:
361
361
  """Execute the flow blocks in sequence to generate data.
362
362
 
@@ -378,6 +378,9 @@ class Flow(BaseModel):
378
378
  save_freq : Optional[int], optional
379
379
  Number of completed samples after which to save a checkpoint.
380
380
  If None, only saves final results when checkpointing is enabled.
381
+ max_concurrency : Optional[int], optional
382
+ Maximum number of concurrent requests across all blocks.
383
+ Controls async request concurrency to prevent overwhelming servers.
381
384
 
382
385
  Returns
383
386
  -------
@@ -397,6 +400,20 @@ class Flow(BaseModel):
397
400
  f"save_freq must be greater than 0, got {save_freq}"
398
401
  )
399
402
 
403
+ # Validate max_concurrency parameter
404
+ if max_concurrency is not None:
405
+ # Explicitly reject boolean values (bool is a subclass of int in Python)
406
+ if isinstance(max_concurrency, bool) or not isinstance(
407
+ max_concurrency, int
408
+ ):
409
+ raise FlowValidationError(
410
+ f"max_concurrency must be an int, got {type(max_concurrency).__name__}"
411
+ )
412
+ if max_concurrency <= 0:
413
+ raise FlowValidationError(
414
+ f"max_concurrency must be greater than 0, got {max_concurrency}"
415
+ )
416
+
400
417
  # Validate preconditions
401
418
  if not self.blocks:
402
419
  raise FlowValidationError("Cannot generate with empty flow")
@@ -404,6 +421,8 @@ class Flow(BaseModel):
404
421
  if len(dataset) == 0:
405
422
  raise EmptyDatasetError("Input dataset is empty")
406
423
 
424
+ validate_no_duplicates(dataset)
425
+
407
426
  # Check if model configuration has been set for flows with LLM blocks
408
427
  llm_blocks = self._detect_llm_blocks()
409
428
  if llm_blocks and not self._model_config_set:
@@ -420,6 +439,10 @@ class Flow(BaseModel):
420
439
  "Dataset validation failed:\n" + "\n".join(dataset_errors)
421
440
  )
422
441
 
442
+ # Log concurrency control if specified
443
+ if max_concurrency is not None:
444
+ logger.info(f"Using max_concurrency={max_concurrency} for LLM requests")
445
+
423
446
  # Initialize checkpointer if enabled
424
447
  checkpointer = None
425
448
  completed_dataset = None
@@ -445,6 +468,7 @@ class Flow(BaseModel):
445
468
  logger.info(
446
469
  f"Starting flow '{self.metadata.name}' v{self.metadata.version} "
447
470
  f"with {len(dataset)} samples across {len(self.blocks)} blocks"
471
+ + (f" (max_concurrency={max_concurrency})" if max_concurrency else "")
448
472
  )
449
473
 
450
474
  # Merge migrated runtime params with provided ones (provided ones take precedence)
@@ -468,7 +492,7 @@ class Flow(BaseModel):
468
492
 
469
493
  # Execute all blocks on this chunk
470
494
  processed_chunk = self._execute_blocks_on_dataset(
471
- chunk_dataset, runtime_params
495
+ chunk_dataset, runtime_params, max_concurrency
472
496
  )
473
497
  all_processed.append(processed_chunk)
474
498
 
@@ -492,7 +516,9 @@ class Flow(BaseModel):
492
516
 
493
517
  else:
494
518
  # Process entire dataset at once
495
- final_dataset = self._execute_blocks_on_dataset(dataset, runtime_params)
519
+ final_dataset = self._execute_blocks_on_dataset(
520
+ dataset, runtime_params, max_concurrency
521
+ )
496
522
 
497
523
  # Save final checkpoint if checkpointing enabled
498
524
  if checkpointer:
@@ -515,7 +541,10 @@ class Flow(BaseModel):
515
541
  return final_dataset
516
542
 
517
543
  def _execute_blocks_on_dataset(
518
- self, dataset: Dataset, runtime_params: dict[str, dict[str, Any]]
544
+ self,
545
+ dataset: Dataset,
546
+ runtime_params: dict[str, dict[str, Any]],
547
+ max_concurrency: Optional[int] = None,
519
548
  ) -> Dataset:
520
549
  """Execute all blocks in sequence on the given dataset.
521
550
 
@@ -525,6 +554,8 @@ class Flow(BaseModel):
525
554
  Dataset to process through all blocks.
526
555
  runtime_params : Dict[str, Dict[str, Any]]
527
556
  Runtime parameters for block execution.
557
+ max_concurrency : Optional[int], optional
558
+ Maximum concurrency for LLM requests across blocks.
528
559
 
529
560
  Returns
530
561
  -------
@@ -543,6 +574,10 @@ class Flow(BaseModel):
543
574
  # Prepare block execution parameters
544
575
  block_kwargs = self._prepare_block_kwargs(block, runtime_params)
545
576
 
577
+ # Add max_concurrency to block kwargs if provided
578
+ if max_concurrency is not None:
579
+ block_kwargs["_flow_max_concurrency"] = max_concurrency
580
+
546
581
  try:
547
582
  # Check if this is a deprecated block and skip validations
548
583
  is_deprecated_block = (
@@ -899,6 +934,8 @@ class Flow(BaseModel):
899
934
  if len(dataset) == 0:
900
935
  raise EmptyDatasetError("Input dataset is empty")
901
936
 
937
+ validate_no_duplicates(dataset)
938
+
902
939
  # Use smaller sample size if dataset is smaller
903
940
  actual_sample_size = min(sample_size, len(dataset))
904
941
 
@@ -1066,6 +1103,90 @@ class Flow(BaseModel):
1066
1103
  "block_names": [block.block_name for block in self.blocks],
1067
1104
  }
1068
1105
 
1106
+ def get_dataset_requirements(self) -> Optional[DatasetRequirements]:
1107
+ """Get the dataset requirements for this flow.
1108
+
1109
+ Returns
1110
+ -------
1111
+ Optional[DatasetRequirements]
1112
+ Dataset requirements object or None if not defined.
1113
+
1114
+ Examples
1115
+ --------
1116
+ >>> flow = Flow.from_yaml("path/to/flow.yaml")
1117
+ >>> requirements = flow.get_dataset_requirements()
1118
+ >>> if requirements:
1119
+ ... print(f"Required columns: {requirements.required_columns}")
1120
+ """
1121
+ return self.metadata.dataset_requirements
1122
+
1123
+ def get_dataset_schema(self) -> Dataset:
1124
+ """Get an empty dataset with the correct schema for this flow.
1125
+
1126
+ Returns
1127
+ -------
1128
+ Dataset
1129
+ Empty HuggingFace Dataset with the correct schema/features for this flow.
1130
+ Users can add data to this dataset or use it to validate their own dataset schema.
1131
+
1132
+ Examples
1133
+ --------
1134
+ >>> flow = Flow.from_yaml("path/to/flow.yaml")
1135
+ >>> schema_dataset = flow.get_dataset_schema()
1136
+ >>>
1137
+ >>> # Add your data
1138
+ >>> schema_dataset = schema_dataset.add_item({
1139
+ ... "document": "Your document text",
1140
+ ... "domain": "Computer Science",
1141
+ ... "icl_document": "Example document"
1142
+ ... })
1143
+ >>>
1144
+ >>> # Or validate your existing dataset schema
1145
+ >>> my_dataset = Dataset.from_dict(my_data)
1146
+ >>> if my_dataset.features == schema_dataset.features:
1147
+ ... print("Schema matches!")
1148
+ """
1149
+
1150
+ requirements = self.get_dataset_requirements()
1151
+
1152
+ if requirements is None:
1153
+ # Return empty dataset with no schema requirements
1154
+ return Dataset.from_dict({})
1155
+
1156
+ # Build schema features
1157
+ schema_features = {}
1158
+
1159
+ # Process required columns
1160
+ for col_name in requirements.required_columns:
1161
+ col_type = requirements.column_types.get(col_name, "string")
1162
+ schema_features[col_name] = self._map_column_type_to_feature(col_type)
1163
+
1164
+ # Process optional columns
1165
+ for col_name in requirements.optional_columns:
1166
+ col_type = requirements.column_types.get(col_name, "string")
1167
+ schema_features[col_name] = self._map_column_type_to_feature(col_type)
1168
+
1169
+ # Create empty dataset with the correct features
1170
+ features = datasets.Features(schema_features)
1171
+ empty_data = {col_name: [] for col_name in schema_features.keys()}
1172
+
1173
+ return Dataset.from_dict(empty_data, features=features)
1174
+
1175
+ def _map_column_type_to_feature(self, col_type: str):
1176
+ """Map column type string to HuggingFace feature type."""
1177
+ # Map common type names to HuggingFace types
1178
+ if col_type in ["str", "string", "text"]:
1179
+ return datasets.Value("string")
1180
+ elif col_type in ["int", "integer"]:
1181
+ return datasets.Value("int64")
1182
+ elif col_type in ["float", "number"]:
1183
+ return datasets.Value("float64")
1184
+ elif col_type in ["bool", "boolean"]:
1185
+ return datasets.Value("bool")
1186
+ else:
1187
+ # Default to string for unknown types
1188
+ return datasets.Value("string")
1189
+
1069
1190
  def print_info(self) -> None:
1070
1191
  """
1071
1192
  Print an interactive summary of the Flow in the console.
@@ -15,6 +15,35 @@ def safe_concatenate_datasets(datasets: list):
15
15
  return concatenate_datasets(filtered_datasets)
16
16
 
17
17
 
18
+ def validate_no_duplicates(dataset: Dataset) -> None:
19
+ """
20
+ Validate that the input dataset contains only unique rows.
21
+
22
+ Uses pandas `.duplicated()` for efficient duplicate detection.
23
+ Raises FlowValidationError if duplicates are found, including a count
24
+ of the duplicate rows detected.
25
+
26
+ Parameters
27
+ ----------
28
+ dataset : Dataset
29
+ Input dataset to validate.
30
+
31
+ Raises
32
+ ------
33
+ FlowValidationError
34
+ If duplicate rows are detected in the dataset.
35
+ """
36
+ df = dataset.to_pandas()
37
+ duplicate_count = int(df.duplicated(keep="first").sum())
38
+
39
+ if duplicate_count > 0:
40
+ raise FlowValidationError(
41
+ f"Input dataset contains {duplicate_count} duplicate rows. "
42
+ f"SDG Hub operations require unique input rows. "
43
+ f"Please deduplicate your dataset before processing."
44
+ )
45
+
46
+
18
47
  def safe_concatenate_with_validation(
19
48
  datasets: list, context: str = "datasets"
20
49
  ) -> Dataset:
@@ -46,7 +46,6 @@ blocks:
46
46
  input_cols: [document, document_outline]
47
47
  output_cols: summary_prompt
48
48
  prompt_config_path: detailed_summary.yaml
49
- format_as_messages: true
50
49
 
51
50
  - block_type: LLMChatBlock
52
51
  block_config:
@@ -70,7 +69,6 @@ blocks:
70
69
  input_cols: [document, document_outline, domain]
71
70
  output_cols: atomic_facts_prompt
72
71
  prompt_config_path: atomic_facts.yaml
73
- format_as_messages: true
74
72
 
75
73
  - block_type: LLMChatBlock
76
74
  block_config:
@@ -94,7 +92,6 @@ blocks:
94
92
  input_cols: [document, document_outline]
95
93
  output_cols: extractive_summary_prompt
96
94
  prompt_config_path: extractive_summary.yaml
97
- format_as_messages: true
98
95
 
99
96
  - block_type: LLMChatBlock
100
97
  block_config:
@@ -129,7 +126,6 @@ blocks:
129
126
  input_cols: [domain, document, document_outline, icl_document, icl_query_1, icl_response_1, icl_query_2, icl_response_2, icl_query_3, icl_response_3]
130
127
  output_cols: knowledge_generation_prompt
131
128
  prompt_config_path: generate_questions_responses.yaml
132
- format_as_messages: true
133
129
 
134
130
  - block_type: LLMChatBlock
135
131
  block_config:
@@ -157,7 +153,6 @@ blocks:
157
153
  filter_value: "YES"
158
154
  operation: eq
159
155
  async_mode: true
160
- format_as_messages: true
161
156
  start_tags: ["[Start of Explanation]", "[Start of Answer]"]
162
157
  end_tags: ["[End of Explanation]", "[End of Answer]"]
163
158
 
@@ -172,7 +167,6 @@ blocks:
172
167
  convert_dtype: float
173
168
  max_tokens: 2048
174
169
  async_mode: true
175
- format_as_messages: true
176
170
  start_tags: ["[Start of Feedback]", "[Start of Score]"]
177
171
  end_tags: ["[End of Feedback]", "[End of Score]"]
178
172
 
@@ -187,6 +181,5 @@ blocks:
187
181
  convert_dtype: float
188
182
  max_tokens: 2048
189
183
  async_mode: true
190
- format_as_messages: true
191
184
  start_tags: ["[Start of Explanation]", "[Start of Rating]"]
192
185
  end_tags: ["[End of Explanation]", "[End of Rating]"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -27,7 +27,6 @@ Requires-Dist: datasets<4.0.0,>=2.18.0
27
27
  Requires-Dist: httpx<1.0.0,>=0.25.0
28
28
  Requires-Dist: jinja2
29
29
  Requires-Dist: litellm<1.75.0,>=1.73.0
30
- Requires-Dist: openai<2.0.0,>=1.13.3
31
30
  Requires-Dist: rich
32
31
  Requires-Dist: pydantic<3.0.0,>=2.0.0
33
32
  Requires-Dist: python-dotenv<2.0.0,>=1.0.0
@@ -92,6 +91,8 @@ A modular Python framework for building synthetic data generation pipelines usin
92
91
 
93
92
  **📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
94
93
 
94
+ **📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
95
+
95
96
  **🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
96
97
 
97
98
 
@@ -176,22 +177,46 @@ flow.set_model_config(
176
177
  api_key="your_key",
177
178
  )
178
179
  ```
179
- #### Load your dataset and run the flow
180
+ #### Discover dataset requirements and create your dataset
180
181
  ```python
181
- # Create your dataset with required columns
182
- dataset = Dataset.from_dict({
183
- 'document': ['Your document text here...'],
184
- 'document_outline': ['1. Topic A; 2. Topic B; 3. Topic C'],
185
- 'domain': ['Computer Science'],
186
- 'icl_document': ['Example document for in-context learning...'],
187
- 'icl_query_1': ['Example question 1?'],
188
- 'icl_response_1': ['Example answer 1'],
189
- 'icl_query_2': ['Example question 2?'],
190
- 'icl_response_2': ['Example answer 2'],
191
- 'icl_query_3': ['Example question 3?'],
192
- 'icl_response_3': ['Example answer 3']
182
+ # First, discover what data the flow needs
183
+ # Get an empty dataset with the exact schema needed
184
+ schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
185
+ print(f"Required columns: {schema_dataset.column_names}")
186
+ print(f"Schema: {schema_dataset.features}")
187
+
188
+ # Option 1: Add data directly to the schema dataset
189
+ dataset = schema_dataset.add_item({
190
+ 'document': 'Your document text here...',
191
+ 'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
192
+ 'domain': 'Computer Science',
193
+ 'icl_document': 'Example document for in-context learning...',
194
+ 'icl_query_1': 'Example question 1?',
195
+ 'icl_response_1': 'Example answer 1',
196
+ 'icl_query_2': 'Example question 2?',
197
+ 'icl_response_2': 'Example answer 2',
198
+ 'icl_query_3': 'Example question 3?',
199
+ 'icl_response_3': 'Example answer 3'
193
200
  })
194
201
 
202
+ # Option 2: Create your own dataset and validate the schema
203
+ my_dataset = Dataset.from_dict(my_data_dict)
204
+ if my_dataset.features == schema_dataset.features:
205
+ print("✅ Schema matches - ready to generate!")
206
+ dataset = my_dataset
207
+ else:
208
+ print("❌ Schema mismatch - check your columns")
209
+
210
+ # Option 3: Get raw requirements for detailed inspection
211
+ requirements = flow.get_dataset_requirements()
212
+ if requirements:
213
+ print(f"Required: {requirements.required_columns}")
214
+ print(f"Optional: {requirements.optional_columns}")
215
+ print(f"Min samples: {requirements.min_samples}")
216
+ ```
217
+
218
+ #### Dry Run and Generate
219
+ ```python
195
220
  # Quick Testing with Dry Run
196
221
  dry_result = flow.dry_run(dataset, sample_size=1)
197
222
  print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
@@ -1,10 +1,10 @@
1
1
  sdg_hub/__init__.py,sha256=Tw-6R5a8_W1kJcTAsW3R9ltBDP1dy5-fe7Tvt3cSyCQ,550
2
- sdg_hub/_version.py,sha256=UoNvMtd4wCG76RwoSpNCUtaFyTwakGcZolfjXzNVSMY,511
2
+ sdg_hub/_version.py,sha256=o3ZTescp-19Z9cvBGq9dQnbppljgzdUYUf98Nov0spY,704
3
3
  sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sdg_hub/core/__init__.py,sha256=NwqB4fwhC29W50VW7QXZssLxx122YvgO9LHDLdgAnrI,496
5
5
  sdg_hub/core/blocks/__init__.py,sha256=9sCkCvDQzJGSedaePVlEIpbNwrkBz_K500VW_6FLhuE,1601
6
6
  sdg_hub/core/blocks/base.py,sha256=TrzUAkG7Tiquk0Z3SOFsb5mRnHd1IbHH6gFPVH1P7T8,10424
7
- sdg_hub/core/blocks/registry.py,sha256=U__75QrxFpRaJlt36mOd26dgOqBeePs-ZX0Rnutp6r0,9782
7
+ sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
8
8
  sdg_hub/core/blocks/deprecated_blocks/__init__.py,sha256=RDu3MWFStDQko-TKkx8tGoB1UTatP_RSldZK43zHDvY,889
9
9
  sdg_hub/core/blocks/deprecated_blocks/combine_columns.py,sha256=HCvpaYsAwgx1Dm0vIshcWsKoVsRT0KrmKp9j4oqtByc,2757
10
10
  sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py,sha256=maCaaEs0EMMzt7L1xm7fAH3ylaFMHEkeC_dtOw3FrjU,2694
@@ -13,38 +13,38 @@ sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py,sha256=IenCskrPEv09h2uT
13
13
  sdg_hub/core/blocks/deprecated_blocks/llmblock.py,sha256=34lzC43BODpMk5AwlWA1ctdYPmN7cA6WL5vMXaI0P0Y,20385
14
14
  sdg_hub/core/blocks/deprecated_blocks/rename_columns.py,sha256=thp-mHtkRmUw_nYKpldy_mLWR2AvC5YUhbqDETM6-T0,2620
15
15
  sdg_hub/core/blocks/deprecated_blocks/sample_populator.py,sha256=UdueMApxOmPWaxxMrw7b1v74fKJBfqqRATEBqgmVtNw,1737
16
- sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=ABcXZrqEMsgKfdGAkSo2plMp4LsZSqPhEQugoDEYm1I,2950
16
+ sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=nWecsVsW8DvBcqAF_LOqXmW-5MQ28uN3d1y6wkSy38c,2960
17
17
  sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py,sha256=44TQu-rK5isia-otMVB1zHd8D-wWmu3C8CI1NLtfY5s,2729
18
18
  sdg_hub/core/blocks/evaluation/__init__.py,sha256=kFXee-vsVVdU2XtLio9qHgPx_a0zoB_rQr509EKBGJc,357
19
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=ZuQ8jq2JwTdslUJtFi1E9NXebCWFZS8isXOafcJ_CMU,23026
20
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=ieQRwl4bx5EQ3m7Wa2P3pHLUPQY7HuwNWjHUCo98u6g,22832
21
- sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=fSNbW1KpdfVE0fQsm4Y8QfVk6A3J5H3C0dtGn49t8tM,22853
19
+ sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=vFi3YIxVPNnzgdenIeAl7yUb4OOUY_uUOXS-pWLsDmw,12223
20
+ sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=NXT1lixR-JnOXNlBCbMjULcpu4kh2SthhwCWEobiBt0,12115
21
+ sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=LKoIHdxUuTVO24n_M9cAliEj56uEe2kQAecKTRz65zI,12465
22
22
  sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
23
23
  sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=H8Gif0q9Wc_d1TnVow8Zpsg7blJOFGN1EZmV6OPpkcg,5971
24
24
  sdg_hub/core/blocks/llm/__init__.py,sha256=N6-Prgd4X85oWbMQzhYMrq7OX-NTJm57cghowK-val0,844
25
- sdg_hub/core/blocks/llm/client_manager.py,sha256=vaoPoTITJ9IlooeVRfu6M4WBc08mp4aJZ5tvnl2fMv8,12309
26
- sdg_hub/core/blocks/llm/config.py,sha256=TmbfqxPHH3mShTK2EuCX2AGKtDvl0aSvihsaqgzABtM,11266
25
+ sdg_hub/core/blocks/llm/client_manager.py,sha256=PDf07t2s68WQaoU-LTRke6nQUDWDKwiV4ptAT7Cbn18,14047
26
+ sdg_hub/core/blocks/llm/config.py,sha256=gc4xp5D20MSlKMFEos0QAaKUwgbZpBtMGXmn6LsIk78,11289
27
27
  sdg_hub/core/blocks/llm/error_handler.py,sha256=7T-019ZFB9qgZoX1ybIiXyaLjPzrF96qcKmUu6vmO6g,12178
28
- sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=3o2oV_ecWsEHFp5FWPIpBT-yJ1imJmeZy2b9GZL-T54,20121
29
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=mMmifTC-sRUhdxuLRRtAMhQC7r7NOyTAfBx-xTzLzTc,19669
28
+ sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=J-iDJTY-txuduFXx-NUhyohWpRnEaYIg4f0VsSgpjVw,22641
29
+ sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=H7LqYpEiFO1g2cxncAl4vkLhQxAYgGpV6cUyQTSG03k,27628
30
30
  sdg_hub/core/blocks/llm/prompt_builder_block.py,sha256=fkJd718X1oYlMY1cjo_8WCO16Gl8Tm0bUPWR78E_uws,13935
31
- sdg_hub/core/blocks/llm/text_parser_block.py,sha256=vQgUaeYJI9HuxDPRjII-NIOsR01JA-sBBGl05623L8I,14391
31
+ sdg_hub/core/blocks/llm/text_parser_block.py,sha256=uYcof6cy1tgMG_mHpFBW4xhVXg40rNhTerM5EqAvcEo,14187
32
32
  sdg_hub/core/blocks/transform/__init__.py,sha256=Y_3izPCtgnMbFK-gBMeLHZspSrNLgbGheAJXU57XfFw,746
33
33
  sdg_hub/core/blocks/transform/duplicate_columns.py,sha256=SaP7rIF4ZFEFFa50aU2xGNIuddXaEZrKxdWfHjzFpVI,2833
34
- sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=mGup5agvDf9kAFSvXE5X6Puo6CQc9UOdFdbhdFWJjwk,8225
34
+ sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=XC_a7Skbd3mu7f4ra8fGWPxMwqUMSjJkQ7Ag7vflwJA,8235
35
35
  sdg_hub/core/blocks/transform/melt_columns.py,sha256=vaYa5Taq6GhNZYWFL4uPK3-SfN2BsKEm-wvjd2EYYoI,4382
36
36
  sdg_hub/core/blocks/transform/rename_columns.py,sha256=qeB5L2utqDQnutUetH1VKZSqDiJSH_yUp5EFCV-XCVI,1998
37
37
  sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
38
38
  sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
39
39
  sdg_hub/core/flow/__init__.py,sha256=N2NZGngvd7qpT5FI_knKukUFM0IkD9K5jdTi-gDeUI4,475
40
- sdg_hub/core/flow/base.py,sha256=Jm90xQ1ns0ArEiqkceSME6phzBtkw6nthjSJNTU3IkQ,45530
40
+ sdg_hub/core/flow/base.py,sha256=eneLS9GR21q9nK3M8qZzIyJ-OeFF2Lp6ZwzQjBVnbyk,50364
41
41
  sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
42
42
  sdg_hub/core/flow/metadata.py,sha256=h9jpvAzWsF5n4ztZMzwa9ZNgnzKTHmFWdn7YbyJLHCw,12977
43
43
  sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
44
44
  sdg_hub/core/flow/registry.py,sha256=DzCqEEgwhvwnCBAGLogoMVdwXh4pCHrxOWqoxam7O8I,12162
45
45
  sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
46
46
  sdg_hub/core/utils/__init__.py,sha256=C2FzLn3dHprwGJDEgI4fyFS3aoCJR-9PhHsunxropJ8,351
47
- sdg_hub/core/utils/datautils.py,sha256=QnzMl7nOp0crNJEWgAqurOuuAyz0SnvAjLiKzvG0uds,1933
47
+ sdg_hub/core/utils/datautils.py,sha256=vvZSNZ94vMQMh9Bs99X92UPwSNzyyYwO3V4w3O3QYoA,2801
48
48
  sdg_hub/core/utils/error_handling.py,sha256=yku8cGj_nKCyXDsnb-mHCpgukkkAMucJ4iAUrIzqysc,5510
49
49
  sdg_hub/core/utils/flow_id_words.yaml,sha256=5QHpQdP7zwahRuooyAlJIwBY7WcDR7vtbJXxVJqujbg,2317
50
50
  sdg_hub/core/utils/flow_identifier.py,sha256=aAHfK_G9AwEtMglLRMdMpi_AI1dciub5UqBGm4yb2HE,2841
@@ -59,10 +59,10 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
59
59
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
60
60
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
61
61
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
62
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=Rrl9eve9QsGLojAkflgKTHyUgUawKfvhEVAnAxBLZJ8,6307
62
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=oyrLRjEnmioMa_G_sd9yQK_nBt4arwWV5fvKgzYE2ds,6090
63
63
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
64
- sdg_hub-0.2.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
65
- sdg_hub-0.2.1.dist-info/METADATA,sha256=0Si2PZotpwtUI2Pg2cc3uSZIJtS12jF4VInJSTyBngA,8606
66
- sdg_hub-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
67
- sdg_hub-0.2.1.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
68
- sdg_hub-0.2.1.dist-info/RECORD,,
64
+ sdg_hub-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
65
+ sdg_hub-0.2.2.dist-info/METADATA,sha256=OHIiUh4AqSHVW-asGQdUp67TXL_dCRV7NVC0E14IwM0,9647
66
+ sdg_hub-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
67
+ sdg_hub-0.2.2.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
68
+ sdg_hub-0.2.2.dist-info/RECORD,,