sdg-hub 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. sdg_hub/_version.py +16 -3
  2. sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
  3. sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
  4. sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
  5. sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
  6. sdg_hub/core/blocks/llm/__init__.py +2 -0
  7. sdg_hub/core/blocks/llm/client_manager.py +61 -24
  8. sdg_hub/core/blocks/llm/config.py +1 -0
  9. sdg_hub/core/blocks/llm/llm_chat_block.py +62 -7
  10. sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +653 -0
  11. sdg_hub/core/blocks/llm/text_parser_block.py +75 -30
  12. sdg_hub/core/blocks/registry.py +49 -35
  13. sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
  14. sdg_hub/core/flow/base.py +370 -20
  15. sdg_hub/core/flow/checkpointer.py +333 -0
  16. sdg_hub/core/flow/metadata.py +45 -0
  17. sdg_hub/core/flow/migration.py +12 -1
  18. sdg_hub/core/flow/registry.py +121 -58
  19. sdg_hub/core/flow/validation.py +12 -0
  20. sdg_hub/core/utils/__init__.py +2 -1
  21. sdg_hub/core/utils/datautils.py +81 -1
  22. sdg_hub/core/utils/flow_id_words.yaml +231 -0
  23. sdg_hub/core/utils/flow_identifier.py +94 -0
  24. sdg_hub/core/utils/yaml_utils.py +59 -0
  25. sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -7
  26. {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/METADATA +59 -31
  27. {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/RECORD +30 -25
  28. {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/WHEEL +0 -0
  29. {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/licenses/LICENSE +0 -0
  30. {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,94 @@
1
+ # Standard
2
+ from pathlib import Path
3
+ from typing import Dict, List
4
+ import hashlib
5
+ import random
6
+
7
+ # Third Party
8
+ import yaml
9
+
10
+ # Cache for loaded word lists to avoid repeated file I/O
11
+ _WORD_CACHE: Dict[str, List[str]] = {}
12
+
13
+
14
+ def _load_word_lists() -> Dict[str, List[str]]:
15
+ """Load word lists from YAML configuration file.
16
+
17
+ Returns:
18
+ Dictionary containing 'adjectives' and 'nouns' lists
19
+
20
+ Raises:
21
+ FileNotFoundError: If the word list file is not found
22
+ yaml.YAMLError: If the YAML file is malformed
23
+ """
24
+ global _WORD_CACHE
25
+
26
+ if _WORD_CACHE:
27
+ return _WORD_CACHE
28
+
29
+ # Get path to word list file relative to this module
30
+ current_dir = Path(__file__).parent
31
+ words_file = current_dir / "flow_id_words.yaml"
32
+
33
+ try:
34
+ with open(words_file, "r", encoding="utf-8") as f:
35
+ word_data = yaml.safe_load(f)
36
+
37
+ _WORD_CACHE = {
38
+ "adjectives": word_data["adjectives"],
39
+ "nouns": word_data["nouns"],
40
+ }
41
+
42
+ return _WORD_CACHE
43
+
44
+ except FileNotFoundError:
45
+ # Fallback to minimal word lists if configuration file is not found
46
+ _WORD_CACHE = {
47
+ "adjectives": ["bright", "calm", "fast", "smart", "quick"],
48
+ "nouns": ["river", "star", "cloud", "moon", "rock"],
49
+ }
50
+ return _WORD_CACHE
51
+ except yaml.YAMLError as e:
52
+ raise yaml.YAMLError(f"Error parsing word list YAML: {e}")
53
+ except KeyError as e:
54
+ raise KeyError(f"Missing required key in word list YAML: {e}")
55
+
56
+
57
+ def get_flow_identifier(name: str) -> str:
58
+ """Generate a deterministic wandb-style flow identifier.
59
+
60
+ Creates a human-readable identifier in the format "adjective-noun-number"
61
+ that is deterministic based on the input name. Same name will always
62
+ produce the same identifier.
63
+
64
+ Args:
65
+ name: Flow name to generate identifier from
66
+
67
+ Returns:
68
+ A string in the format "adjective-noun-number" (e.g., "bright-river-123")
69
+
70
+ Examples:
71
+ >>> get_flow_identifier("My Document QA Flow")
72
+ "bright-river-123"
73
+ >>> get_flow_identifier("My Document QA Flow") # Same input
74
+ "bright-river-123" # Same output
75
+
76
+ Raises:
77
+ FileNotFoundError: If the word list configuration file is not found
78
+ yaml.YAMLError: If the word list YAML file is malformed
79
+ """
80
+ # Load word lists from YAML configuration
81
+ word_lists = _load_word_lists()
82
+ adjectives = word_lists["adjectives"]
83
+ nouns = word_lists["nouns"]
84
+
85
+ # Create deterministic seed from name
86
+ seed_value = int(hashlib.sha256(name.encode()).hexdigest()[:8], 16)
87
+ rng = random.Random(seed_value)
88
+
89
+ # Select words and number deterministically
90
+ adjective = rng.choice(adjectives)
91
+ noun = rng.choice(nouns)
92
+ number = rng.randint(1, 999)
93
+
94
+ return f"{adjective}-{noun}-{number}"
@@ -0,0 +1,59 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """YAML utilities for flow configuration."""
3
+
4
+ # Standard
5
+ from pathlib import Path
6
+ from typing import Any, Dict
7
+
8
+ # Third Party
9
+ import yaml
10
+
11
+ # Local
12
+ from .logger_config import setup_logger
13
+
14
+ logger = setup_logger(__name__)
15
+
16
+
17
+ def save_flow_yaml(
18
+ yaml_path: str,
19
+ flow_config: Dict[str, Any],
20
+ reason: str = "",
21
+ sort_keys: bool = False,
22
+ width: int = 240,
23
+ indent: int = 2,
24
+ ) -> None:
25
+ """
26
+ Save flow configuration to a YAML file.
27
+
28
+ This utility function saves flow configurations to YAML files,
29
+ ensuring consistent formatting and logging across the codebase.
30
+
31
+ Parameters
32
+ ----------
33
+ yaml_path : str
34
+ Path to the YAML file to write.
35
+ flow_config : Dict[str, Any]
36
+ Flow configuration to save.
37
+ reason : str, optional
38
+ Reason for saving, used in log message.
39
+ width : int, optional
40
+ Maximum line width for YAML output.
41
+ indent : int, optional
42
+ Indentation level for YAML output.
43
+ """
44
+ yaml_path = str(Path(yaml_path)) # Normalize path
45
+
46
+ with open(yaml_path, "w", encoding="utf-8") as f:
47
+ yaml.dump(
48
+ flow_config,
49
+ f,
50
+ default_flow_style=False,
51
+ sort_keys=sort_keys,
52
+ width=width,
53
+ indent=indent,
54
+ )
55
+
56
+ log_msg = f"Saved flow configuration to YAML: {yaml_path}"
57
+ if reason:
58
+ log_msg = f"{log_msg} ({reason})"
59
+ logger.debug(log_msg)
@@ -1,4 +1,5 @@
1
1
  metadata:
2
+ id: small-rock-799
2
3
  name: "Advanced Document Grounded Question-Answer Generation Flow for Knowledge Tuning"
3
4
  description: "A comprehensive flow that generates high-quality question-answer pairs from input documents using multiple LLM blocks for question generation, answer synthesis, and quality evaluation."
4
5
  version: "1.0.0"
@@ -45,7 +46,6 @@ blocks:
45
46
  input_cols: [document, document_outline]
46
47
  output_cols: summary_prompt
47
48
  prompt_config_path: detailed_summary.yaml
48
- format_as_messages: true
49
49
 
50
50
  - block_type: LLMChatBlock
51
51
  block_config:
@@ -69,7 +69,6 @@ blocks:
69
69
  input_cols: [document, document_outline, domain]
70
70
  output_cols: atomic_facts_prompt
71
71
  prompt_config_path: atomic_facts.yaml
72
- format_as_messages: true
73
72
 
74
73
  - block_type: LLMChatBlock
75
74
  block_config:
@@ -93,7 +92,6 @@ blocks:
93
92
  input_cols: [document, document_outline]
94
93
  output_cols: extractive_summary_prompt
95
94
  prompt_config_path: extractive_summary.yaml
96
- format_as_messages: true
97
95
 
98
96
  - block_type: LLMChatBlock
99
97
  block_config:
@@ -128,7 +126,6 @@ blocks:
128
126
  input_cols: [domain, document, document_outline, icl_document, icl_query_1, icl_response_1, icl_query_2, icl_response_2, icl_query_3, icl_response_3]
129
127
  output_cols: knowledge_generation_prompt
130
128
  prompt_config_path: generate_questions_responses.yaml
131
- format_as_messages: true
132
129
 
133
130
  - block_type: LLMChatBlock
134
131
  block_config:
@@ -156,7 +153,6 @@ blocks:
156
153
  filter_value: "YES"
157
154
  operation: eq
158
155
  async_mode: true
159
- format_as_messages: true
160
156
  start_tags: ["[Start of Explanation]", "[Start of Answer]"]
161
157
  end_tags: ["[End of Explanation]", "[End of Answer]"]
162
158
 
@@ -171,7 +167,6 @@ blocks:
171
167
  convert_dtype: float
172
168
  max_tokens: 2048
173
169
  async_mode: true
174
- format_as_messages: true
175
170
  start_tags: ["[Start of Feedback]", "[Start of Score]"]
176
171
  end_tags: ["[End of Feedback]", "[End of Score]"]
177
172
 
@@ -186,6 +181,5 @@ blocks:
186
181
  convert_dtype: float
187
182
  max_tokens: 2048
188
183
  async_mode: true
189
- format_as_messages: true
190
184
  start_tags: ["[Start of Explanation]", "[Start of Rating]"]
191
185
  end_tags: ["[End of Explanation]", "[End of Rating]"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sdg_hub
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Synthetic Data Generation
5
5
  Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
6
6
  License: Apache-2.0
@@ -27,7 +27,6 @@ Requires-Dist: datasets<4.0.0,>=2.18.0
27
27
  Requires-Dist: httpx<1.0.0,>=0.25.0
28
28
  Requires-Dist: jinja2
29
29
  Requires-Dist: litellm<1.75.0,>=1.73.0
30
- Requires-Dist: openai<2.0.0,>=1.13.3
31
30
  Requires-Dist: rich
32
31
  Requires-Dist: pydantic<3.0.0,>=2.0.0
33
32
  Requires-Dist: python-dotenv<2.0.0,>=1.0.0
@@ -92,6 +91,8 @@ A modular Python framework for building synthetic data generation pipelines usin
92
91
 
93
92
  **📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
94
93
 
94
+ **📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
95
+
95
96
  **🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
96
97
 
97
98
 
@@ -121,7 +122,7 @@ uv pip install sdg-hub[examples]
121
122
 
122
123
  ## 🚀 Quick Start
123
124
 
124
- ### 🧱 Core Concepts
125
+ ### Core Concepts
125
126
 
126
127
  **Blocks** are composable units that transform datasets - think of them as data processing Lego pieces. Each block performs a specific task: LLM chat, text parsing, evaluation, or transformation.
127
128
 
@@ -136,7 +137,7 @@ dataset → Block₁ → Block₂ → Block₃ → enriched_dataset
136
137
 
137
138
  #### Flow Discovery
138
139
  ```python
139
- from sdg_hub import FlowRegistry
140
+ from sdg_hub import FlowRegistry, Flow
140
141
 
141
142
  # Auto-discover all available flows (no setup needed!)
142
143
  FlowRegistry.discover_flows()
@@ -150,16 +151,20 @@ qa_flows = FlowRegistry.search_flows(tag="question-generation")
150
151
  print(f"QA flows: {qa_flows}")
151
152
  ```
152
153
 
153
- #### Using Flows
154
+ Each flow has a **unique, human-readable ID** automatically generated from its name. These IDs provide a convenient shorthand for referencing flows:
155
+
154
156
  ```python
155
- from sdg_hub import FlowRegistry, Flow
156
- from datasets import Dataset
157
+ # Every flow gets a deterministic ID
158
+ # Same flow name always generates the same ID
159
+ flow_id = "small-rock-799"
157
160
 
158
- # Load the flow by name
159
- flow_name = "Advanced Document Grounded Question-Answer Generation Flow for Knowledge Tuning"
160
- flow_path = FlowRegistry.get_flow_path(flow_name)
161
+ # Use ID to reference the flow
162
+ flow_path = FlowRegistry.get_flow_path(flow_id)
161
163
  flow = Flow.from_yaml(flow_path)
164
+ ```
162
165
 
166
+ #### Discovering Models and Configuring them
167
+ ```python
163
168
  # Discover recommended models
164
169
  default_model = flow.get_default_model()
165
170
  recommendations = flow.get_model_recommendations()
@@ -171,21 +176,52 @@ flow.set_model_config(
171
176
  api_base="http://localhost:8000/v1",
172
177
  api_key="your_key",
173
178
  )
174
-
175
- # Create your dataset with required columns
176
- dataset = Dataset.from_dict({
177
- 'document': ['Your document text here...'],
178
- 'document_outline': ['1. Topic A; 2. Topic B; 3. Topic C'],
179
- 'domain': ['Computer Science'],
180
- 'icl_document': ['Example document for in-context learning...'],
181
- 'icl_query_1': ['Example question 1?'],
182
- 'icl_response_1': ['Example answer 1'],
183
- 'icl_query_2': ['Example question 2?'],
184
- 'icl_response_2': ['Example answer 2'],
185
- 'icl_query_3': ['Example question 3?'],
186
- 'icl_response_3': ['Example answer 3']
179
+ ```
180
+ #### Discover dataset requirements and create your dataset
181
+ ```python
182
+ # First, discover what data the flow needs
183
+ # Get an empty dataset with the exact schema needed
184
+ schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
185
+ print(f"Required columns: {schema_dataset.column_names}")
186
+ print(f"Schema: {schema_dataset.features}")
187
+
188
+ # Option 1: Add data directly to the schema dataset
189
+ dataset = schema_dataset.add_item({
190
+ 'document': 'Your document text here...',
191
+ 'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
192
+ 'domain': 'Computer Science',
193
+ 'icl_document': 'Example document for in-context learning...',
194
+ 'icl_query_1': 'Example question 1?',
195
+ 'icl_response_1': 'Example answer 1',
196
+ 'icl_query_2': 'Example question 2?',
197
+ 'icl_response_2': 'Example answer 2',
198
+ 'icl_query_3': 'Example question 3?',
199
+ 'icl_response_3': 'Example answer 3'
187
200
  })
188
201
 
202
+ # Option 2: Create your own dataset and validate the schema
203
+ my_dataset = Dataset.from_dict(my_data_dict)
204
+ if my_dataset.features == schema_dataset.features:
205
+ print("✅ Schema matches - ready to generate!")
206
+ dataset = my_dataset
207
+ else:
208
+ print("❌ Schema mismatch - check your columns")
209
+
210
+ # Option 3: Get raw requirements for detailed inspection
211
+ requirements = flow.get_dataset_requirements()
212
+ if requirements:
213
+ print(f"Required: {requirements.required_columns}")
214
+ print(f"Optional: {requirements.optional_columns}")
215
+ print(f"Min samples: {requirements.min_samples}")
216
+ ```
217
+
218
+ #### Dry Run and Generate
219
+ ```python
220
+ # Quick Testing with Dry Run
221
+ dry_result = flow.dry_run(dataset, sample_size=1)
222
+ print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
223
+ print(f"Output columns: {dry_result['final_dataset']['columns']}")
224
+
189
225
  # Generate high-quality QA pairs
190
226
  result = flow.generate(dataset)
191
227
 
@@ -196,14 +232,6 @@ faithfulness_scores = result['faithfulness_judgment']
196
232
  relevancy_scores = result['relevancy_score']
197
233
  ```
198
234
 
199
- #### Quick Testing with Dry Run
200
- ```python
201
- # Test the flow with a small sample first
202
- dry_result = flow.dry_run(dataset, sample_size=1)
203
- print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
204
- print(f"Output columns: {dry_result['final_dataset']['columns']}")
205
- ```
206
-
207
235
 
208
236
  ## 📄 License
209
237
 
@@ -1,10 +1,10 @@
1
1
  sdg_hub/__init__.py,sha256=Tw-6R5a8_W1kJcTAsW3R9ltBDP1dy5-fe7Tvt3cSyCQ,550
2
- sdg_hub/_version.py,sha256=iB5DfB5V6YB5Wo4JmvS-txT42QtmGaWcWp3udRT7zCI,511
2
+ sdg_hub/_version.py,sha256=o3ZTescp-19Z9cvBGq9dQnbppljgzdUYUf98Nov0spY,704
3
3
  sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  sdg_hub/core/__init__.py,sha256=NwqB4fwhC29W50VW7QXZssLxx122YvgO9LHDLdgAnrI,496
5
5
  sdg_hub/core/blocks/__init__.py,sha256=9sCkCvDQzJGSedaePVlEIpbNwrkBz_K500VW_6FLhuE,1601
6
6
  sdg_hub/core/blocks/base.py,sha256=TrzUAkG7Tiquk0Z3SOFsb5mRnHd1IbHH6gFPVH1P7T8,10424
7
- sdg_hub/core/blocks/registry.py,sha256=a9CcjA5n7JWmfTyeQPml14aW0tlYU9QLkSkskKWJT2o,9771
7
+ sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
8
8
  sdg_hub/core/blocks/deprecated_blocks/__init__.py,sha256=RDu3MWFStDQko-TKkx8tGoB1UTatP_RSldZK43zHDvY,889
9
9
  sdg_hub/core/blocks/deprecated_blocks/combine_columns.py,sha256=HCvpaYsAwgx1Dm0vIshcWsKoVsRT0KrmKp9j4oqtByc,2757
10
10
  sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py,sha256=maCaaEs0EMMzt7L1xm7fAH3ylaFMHEkeC_dtOw3FrjU,2694
@@ -13,39 +13,44 @@ sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py,sha256=IenCskrPEv09h2uT
13
13
  sdg_hub/core/blocks/deprecated_blocks/llmblock.py,sha256=34lzC43BODpMk5AwlWA1ctdYPmN7cA6WL5vMXaI0P0Y,20385
14
14
  sdg_hub/core/blocks/deprecated_blocks/rename_columns.py,sha256=thp-mHtkRmUw_nYKpldy_mLWR2AvC5YUhbqDETM6-T0,2620
15
15
  sdg_hub/core/blocks/deprecated_blocks/sample_populator.py,sha256=UdueMApxOmPWaxxMrw7b1v74fKJBfqqRATEBqgmVtNw,1737
16
- sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=ABcXZrqEMsgKfdGAkSo2plMp4LsZSqPhEQugoDEYm1I,2950
16
+ sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=nWecsVsW8DvBcqAF_LOqXmW-5MQ28uN3d1y6wkSy38c,2960
17
17
  sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py,sha256=44TQu-rK5isia-otMVB1zHd8D-wWmu3C8CI1NLtfY5s,2729
18
18
  sdg_hub/core/blocks/evaluation/__init__.py,sha256=kFXee-vsVVdU2XtLio9qHgPx_a0zoB_rQr509EKBGJc,357
19
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=ZuQ8jq2JwTdslUJtFi1E9NXebCWFZS8isXOafcJ_CMU,23026
20
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=ieQRwl4bx5EQ3m7Wa2P3pHLUPQY7HuwNWjHUCo98u6g,22832
21
- sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=fSNbW1KpdfVE0fQsm4Y8QfVk6A3J5H3C0dtGn49t8tM,22853
19
+ sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=vFi3YIxVPNnzgdenIeAl7yUb4OOUY_uUOXS-pWLsDmw,12223
20
+ sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=NXT1lixR-JnOXNlBCbMjULcpu4kh2SthhwCWEobiBt0,12115
21
+ sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=LKoIHdxUuTVO24n_M9cAliEj56uEe2kQAecKTRz65zI,12465
22
22
  sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
23
23
  sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=H8Gif0q9Wc_d1TnVow8Zpsg7blJOFGN1EZmV6OPpkcg,5971
24
- sdg_hub/core/blocks/llm/__init__.py,sha256=qAb-pzbI3EqjOVjU48Y63cR3Oly5ZjCkhdwkk1ltqTc,732
25
- sdg_hub/core/blocks/llm/client_manager.py,sha256=vaoPoTITJ9IlooeVRfu6M4WBc08mp4aJZ5tvnl2fMv8,12309
26
- sdg_hub/core/blocks/llm/config.py,sha256=TmbfqxPHH3mShTK2EuCX2AGKtDvl0aSvihsaqgzABtM,11266
24
+ sdg_hub/core/blocks/llm/__init__.py,sha256=N6-Prgd4X85oWbMQzhYMrq7OX-NTJm57cghowK-val0,844
25
+ sdg_hub/core/blocks/llm/client_manager.py,sha256=PDf07t2s68WQaoU-LTRke6nQUDWDKwiV4ptAT7Cbn18,14047
26
+ sdg_hub/core/blocks/llm/config.py,sha256=gc4xp5D20MSlKMFEos0QAaKUwgbZpBtMGXmn6LsIk78,11289
27
27
  sdg_hub/core/blocks/llm/error_handler.py,sha256=7T-019ZFB9qgZoX1ybIiXyaLjPzrF96qcKmUu6vmO6g,12178
28
- sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=3o2oV_ecWsEHFp5FWPIpBT-yJ1imJmeZy2b9GZL-T54,20121
28
+ sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=J-iDJTY-txuduFXx-NUhyohWpRnEaYIg4f0VsSgpjVw,22641
29
+ sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=H7LqYpEiFO1g2cxncAl4vkLhQxAYgGpV6cUyQTSG03k,27628
29
30
  sdg_hub/core/blocks/llm/prompt_builder_block.py,sha256=fkJd718X1oYlMY1cjo_8WCO16Gl8Tm0bUPWR78E_uws,13935
30
- sdg_hub/core/blocks/llm/text_parser_block.py,sha256=9n6pHKVmMD1wwEYdFs0kIz5TblmDxl5dtmbyLZHGivo,12005
31
+ sdg_hub/core/blocks/llm/text_parser_block.py,sha256=uYcof6cy1tgMG_mHpFBW4xhVXg40rNhTerM5EqAvcEo,14187
31
32
  sdg_hub/core/blocks/transform/__init__.py,sha256=Y_3izPCtgnMbFK-gBMeLHZspSrNLgbGheAJXU57XfFw,746
32
33
  sdg_hub/core/blocks/transform/duplicate_columns.py,sha256=SaP7rIF4ZFEFFa50aU2xGNIuddXaEZrKxdWfHjzFpVI,2833
33
- sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=mGup5agvDf9kAFSvXE5X6Puo6CQc9UOdFdbhdFWJjwk,8225
34
+ sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=XC_a7Skbd3mu7f4ra8fGWPxMwqUMSjJkQ7Ag7vflwJA,8235
34
35
  sdg_hub/core/blocks/transform/melt_columns.py,sha256=vaYa5Taq6GhNZYWFL4uPK3-SfN2BsKEm-wvjd2EYYoI,4382
35
36
  sdg_hub/core/blocks/transform/rename_columns.py,sha256=qeB5L2utqDQnutUetH1VKZSqDiJSH_yUp5EFCV-XCVI,1998
36
37
  sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
37
38
  sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
38
39
  sdg_hub/core/flow/__init__.py,sha256=N2NZGngvd7qpT5FI_knKukUFM0IkD9K5jdTi-gDeUI4,475
39
- sdg_hub/core/flow/base.py,sha256=0sx_chQIeuBcLH1fNMkkD0PxX5UeEv_pCBxYI0Byzi8,36884
40
- sdg_hub/core/flow/metadata.py,sha256=_IfFWtCukYoMMG2QWRganUl0uGQO_jxniIVBlVmutus,11487
41
- sdg_hub/core/flow/migration.py,sha256=g0Ug4ZrR_ssxJ-ESVP7ubkD0kql6aSChOuMmx-ZMn8A,7198
42
- sdg_hub/core/flow/registry.py,sha256=T2veU05h4Q9vb_6F_NYHnNuFZE21orWsx1-iGl0aoJk,9564
43
- sdg_hub/core/flow/validation.py,sha256=g0G7MH3bz7kcNsfRrlSi8iJZi8gqVcgODhHygVYtJVI,9185
44
- sdg_hub/core/utils/__init__.py,sha256=y_D7HcRxw7FXShw5USQpCt-5h4VXOFFvMOMN3_oALiw,279
45
- sdg_hub/core/utils/datautils.py,sha256=qKK2HXAqI4t-O-9RMu2DdaQVZwTnJj-W7-Hc5o1iqZw,379
40
+ sdg_hub/core/flow/base.py,sha256=eneLS9GR21q9nK3M8qZzIyJ-OeFF2Lp6ZwzQjBVnbyk,50364
41
+ sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
42
+ sdg_hub/core/flow/metadata.py,sha256=h9jpvAzWsF5n4ztZMzwa9ZNgnzKTHmFWdn7YbyJLHCw,12977
43
+ sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
44
+ sdg_hub/core/flow/registry.py,sha256=DzCqEEgwhvwnCBAGLogoMVdwXh4pCHrxOWqoxam7O8I,12162
45
+ sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
46
+ sdg_hub/core/utils/__init__.py,sha256=C2FzLn3dHprwGJDEgI4fyFS3aoCJR-9PhHsunxropJ8,351
47
+ sdg_hub/core/utils/datautils.py,sha256=vvZSNZ94vMQMh9Bs99X92UPwSNzyyYwO3V4w3O3QYoA,2801
46
48
  sdg_hub/core/utils/error_handling.py,sha256=yku8cGj_nKCyXDsnb-mHCpgukkkAMucJ4iAUrIzqysc,5510
49
+ sdg_hub/core/utils/flow_id_words.yaml,sha256=5QHpQdP7zwahRuooyAlJIwBY7WcDR7vtbJXxVJqujbg,2317
50
+ sdg_hub/core/utils/flow_identifier.py,sha256=aAHfK_G9AwEtMglLRMdMpi_AI1dciub5UqBGm4yb2HE,2841
47
51
  sdg_hub/core/utils/logger_config.py,sha256=MPYdpyNXh_pxFUOAvSCHa98LGjxjaLXoUoqWekqTG4s,422
48
52
  sdg_hub/core/utils/path_resolution.py,sha256=yWof4kGNpQ5dKcrVHg0h9KfOKLZ6ROjdfsLAZsQT5rM,2000
53
+ sdg_hub/core/utils/yaml_utils.py,sha256=tShCd-FFkp0xlKnLe7dXsMOR4AvT9d2qRUmu4ZnPSEY,1458
49
54
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
55
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
56
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml,sha256=xgUNY793y4lcpdtuWm5Ah1CmbU2gvvPQCpZMMa6kPXU,2447
@@ -54,10 +59,10 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
54
59
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
55
60
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
56
61
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
57
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=RrWr2jaandGgLkJiBLFPPA1g6B6vmL98-qXPozqjHKQ,6286
62
+ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=oyrLRjEnmioMa_G_sd9yQK_nBt4arwWV5fvKgzYE2ds,6090
58
63
  sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
59
- sdg_hub-0.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
- sdg_hub-0.2.0.dist-info/METADATA,sha256=APjsGUk94_tQRVlncgVxkEOTSOpHY25SOMmOO1lt0P0,8464
61
- sdg_hub-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
- sdg_hub-0.2.0.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
63
- sdg_hub-0.2.0.dist-info/RECORD,,
64
+ sdg_hub-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
65
+ sdg_hub-0.2.2.dist-info/METADATA,sha256=OHIiUh4AqSHVW-asGQdUp67TXL_dCRV7NVC0E14IwM0,9647
66
+ sdg_hub-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
67
+ sdg_hub-0.2.2.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
68
+ sdg_hub-0.2.2.dist-info/RECORD,,