sdg-hub 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +16 -3
- sdg_hub/core/blocks/deprecated_blocks/selector.py +1 -1
- sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py +175 -416
- sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py +174 -415
- sdg_hub/core/blocks/evaluation/verify_question_block.py +180 -415
- sdg_hub/core/blocks/llm/__init__.py +2 -0
- sdg_hub/core/blocks/llm/client_manager.py +61 -24
- sdg_hub/core/blocks/llm/config.py +1 -0
- sdg_hub/core/blocks/llm/llm_chat_block.py +62 -7
- sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py +653 -0
- sdg_hub/core/blocks/llm/text_parser_block.py +75 -30
- sdg_hub/core/blocks/registry.py +49 -35
- sdg_hub/core/blocks/transform/index_based_mapper.py +1 -1
- sdg_hub/core/flow/base.py +370 -20
- sdg_hub/core/flow/checkpointer.py +333 -0
- sdg_hub/core/flow/metadata.py +45 -0
- sdg_hub/core/flow/migration.py +12 -1
- sdg_hub/core/flow/registry.py +121 -58
- sdg_hub/core/flow/validation.py +12 -0
- sdg_hub/core/utils/__init__.py +2 -1
- sdg_hub/core/utils/datautils.py +81 -1
- sdg_hub/core/utils/flow_id_words.yaml +231 -0
- sdg_hub/core/utils/flow_identifier.py +94 -0
- sdg_hub/core/utils/yaml_utils.py +59 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -7
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/METADATA +59 -31
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/RECORD +30 -25
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/WHEEL +0 -0
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.2.0.dist-info → sdg_hub-0.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
# Standard
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Dict, List
|
4
|
+
import hashlib
|
5
|
+
import random
|
6
|
+
|
7
|
+
# Third Party
|
8
|
+
import yaml
|
9
|
+
|
10
|
+
# Cache for loaded word lists to avoid repeated file I/O
|
11
|
+
_WORD_CACHE: Dict[str, List[str]] = {}
|
12
|
+
|
13
|
+
|
14
|
+
def _load_word_lists() -> Dict[str, List[str]]:
|
15
|
+
"""Load word lists from YAML configuration file.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Dictionary containing 'adjectives' and 'nouns' lists
|
19
|
+
|
20
|
+
Raises:
|
21
|
+
FileNotFoundError: If the word list file is not found
|
22
|
+
yaml.YAMLError: If the YAML file is malformed
|
23
|
+
"""
|
24
|
+
global _WORD_CACHE
|
25
|
+
|
26
|
+
if _WORD_CACHE:
|
27
|
+
return _WORD_CACHE
|
28
|
+
|
29
|
+
# Get path to word list file relative to this module
|
30
|
+
current_dir = Path(__file__).parent
|
31
|
+
words_file = current_dir / "flow_id_words.yaml"
|
32
|
+
|
33
|
+
try:
|
34
|
+
with open(words_file, "r", encoding="utf-8") as f:
|
35
|
+
word_data = yaml.safe_load(f)
|
36
|
+
|
37
|
+
_WORD_CACHE = {
|
38
|
+
"adjectives": word_data["adjectives"],
|
39
|
+
"nouns": word_data["nouns"],
|
40
|
+
}
|
41
|
+
|
42
|
+
return _WORD_CACHE
|
43
|
+
|
44
|
+
except FileNotFoundError:
|
45
|
+
# Fallback to minimal word lists if configuration file is not found
|
46
|
+
_WORD_CACHE = {
|
47
|
+
"adjectives": ["bright", "calm", "fast", "smart", "quick"],
|
48
|
+
"nouns": ["river", "star", "cloud", "moon", "rock"],
|
49
|
+
}
|
50
|
+
return _WORD_CACHE
|
51
|
+
except yaml.YAMLError as e:
|
52
|
+
raise yaml.YAMLError(f"Error parsing word list YAML: {e}")
|
53
|
+
except KeyError as e:
|
54
|
+
raise KeyError(f"Missing required key in word list YAML: {e}")
|
55
|
+
|
56
|
+
|
57
|
+
def get_flow_identifier(name: str) -> str:
|
58
|
+
"""Generate a deterministic wandb-style flow identifier.
|
59
|
+
|
60
|
+
Creates a human-readable identifier in the format "adjective-noun-number"
|
61
|
+
that is deterministic based on the input name. Same name will always
|
62
|
+
produce the same identifier.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
name: Flow name to generate identifier from
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
A string in the format "adjective-noun-number" (e.g., "bright-river-123")
|
69
|
+
|
70
|
+
Examples:
|
71
|
+
>>> get_flow_identifier("My Document QA Flow")
|
72
|
+
"bright-river-123"
|
73
|
+
>>> get_flow_identifier("My Document QA Flow") # Same input
|
74
|
+
"bright-river-123" # Same output
|
75
|
+
|
76
|
+
Raises:
|
77
|
+
FileNotFoundError: If the word list configuration file is not found
|
78
|
+
yaml.YAMLError: If the word list YAML file is malformed
|
79
|
+
"""
|
80
|
+
# Load word lists from YAML configuration
|
81
|
+
word_lists = _load_word_lists()
|
82
|
+
adjectives = word_lists["adjectives"]
|
83
|
+
nouns = word_lists["nouns"]
|
84
|
+
|
85
|
+
# Create deterministic seed from name
|
86
|
+
seed_value = int(hashlib.sha256(name.encode()).hexdigest()[:8], 16)
|
87
|
+
rng = random.Random(seed_value)
|
88
|
+
|
89
|
+
# Select words and number deterministically
|
90
|
+
adjective = rng.choice(adjectives)
|
91
|
+
noun = rng.choice(nouns)
|
92
|
+
number = rng.randint(1, 999)
|
93
|
+
|
94
|
+
return f"{adjective}-{noun}-{number}"
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
2
|
+
"""YAML utilities for flow configuration."""
|
3
|
+
|
4
|
+
# Standard
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Any, Dict
|
7
|
+
|
8
|
+
# Third Party
|
9
|
+
import yaml
|
10
|
+
|
11
|
+
# Local
|
12
|
+
from .logger_config import setup_logger
|
13
|
+
|
14
|
+
logger = setup_logger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
def save_flow_yaml(
|
18
|
+
yaml_path: str,
|
19
|
+
flow_config: Dict[str, Any],
|
20
|
+
reason: str = "",
|
21
|
+
sort_keys: bool = False,
|
22
|
+
width: int = 240,
|
23
|
+
indent: int = 2,
|
24
|
+
) -> None:
|
25
|
+
"""
|
26
|
+
Save flow configuration to a YAML file.
|
27
|
+
|
28
|
+
This utility function saves flow configurations to YAML files,
|
29
|
+
ensuring consistent formatting and logging across the codebase.
|
30
|
+
|
31
|
+
Parameters
|
32
|
+
----------
|
33
|
+
yaml_path : str
|
34
|
+
Path to the YAML file to write.
|
35
|
+
flow_config : Dict[str, Any]
|
36
|
+
Flow configuration to save.
|
37
|
+
reason : str, optional
|
38
|
+
Reason for saving, used in log message.
|
39
|
+
width : int, optional
|
40
|
+
Maximum line width for YAML output.
|
41
|
+
indent : int, optional
|
42
|
+
Indentation level for YAML output.
|
43
|
+
"""
|
44
|
+
yaml_path = str(Path(yaml_path)) # Normalize path
|
45
|
+
|
46
|
+
with open(yaml_path, "w", encoding="utf-8") as f:
|
47
|
+
yaml.dump(
|
48
|
+
flow_config,
|
49
|
+
f,
|
50
|
+
default_flow_style=False,
|
51
|
+
sort_keys=sort_keys,
|
52
|
+
width=width,
|
53
|
+
indent=indent,
|
54
|
+
)
|
55
|
+
|
56
|
+
log_msg = f"Saved flow configuration to YAML: {yaml_path}"
|
57
|
+
if reason:
|
58
|
+
log_msg = f"{log_msg} ({reason})"
|
59
|
+
logger.debug(log_msg)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
metadata:
|
2
|
+
id: small-rock-799
|
2
3
|
name: "Advanced Document Grounded Question-Answer Generation Flow for Knowledge Tuning"
|
3
4
|
description: "A comprehensive flow that generates high-quality question-answer pairs from input documents using multiple LLM blocks for question generation, answer synthesis, and quality evaluation."
|
4
5
|
version: "1.0.0"
|
@@ -45,7 +46,6 @@ blocks:
|
|
45
46
|
input_cols: [document, document_outline]
|
46
47
|
output_cols: summary_prompt
|
47
48
|
prompt_config_path: detailed_summary.yaml
|
48
|
-
format_as_messages: true
|
49
49
|
|
50
50
|
- block_type: LLMChatBlock
|
51
51
|
block_config:
|
@@ -69,7 +69,6 @@ blocks:
|
|
69
69
|
input_cols: [document, document_outline, domain]
|
70
70
|
output_cols: atomic_facts_prompt
|
71
71
|
prompt_config_path: atomic_facts.yaml
|
72
|
-
format_as_messages: true
|
73
72
|
|
74
73
|
- block_type: LLMChatBlock
|
75
74
|
block_config:
|
@@ -93,7 +92,6 @@ blocks:
|
|
93
92
|
input_cols: [document, document_outline]
|
94
93
|
output_cols: extractive_summary_prompt
|
95
94
|
prompt_config_path: extractive_summary.yaml
|
96
|
-
format_as_messages: true
|
97
95
|
|
98
96
|
- block_type: LLMChatBlock
|
99
97
|
block_config:
|
@@ -128,7 +126,6 @@ blocks:
|
|
128
126
|
input_cols: [domain, document, document_outline, icl_document, icl_query_1, icl_response_1, icl_query_2, icl_response_2, icl_query_3, icl_response_3]
|
129
127
|
output_cols: knowledge_generation_prompt
|
130
128
|
prompt_config_path: generate_questions_responses.yaml
|
131
|
-
format_as_messages: true
|
132
129
|
|
133
130
|
- block_type: LLMChatBlock
|
134
131
|
block_config:
|
@@ -156,7 +153,6 @@ blocks:
|
|
156
153
|
filter_value: "YES"
|
157
154
|
operation: eq
|
158
155
|
async_mode: true
|
159
|
-
format_as_messages: true
|
160
156
|
start_tags: ["[Start of Explanation]", "[Start of Answer]"]
|
161
157
|
end_tags: ["[End of Explanation]", "[End of Answer]"]
|
162
158
|
|
@@ -171,7 +167,6 @@ blocks:
|
|
171
167
|
convert_dtype: float
|
172
168
|
max_tokens: 2048
|
173
169
|
async_mode: true
|
174
|
-
format_as_messages: true
|
175
170
|
start_tags: ["[Start of Feedback]", "[Start of Score]"]
|
176
171
|
end_tags: ["[End of Feedback]", "[End of Score]"]
|
177
172
|
|
@@ -186,6 +181,5 @@ blocks:
|
|
186
181
|
convert_dtype: float
|
187
182
|
max_tokens: 2048
|
188
183
|
async_mode: true
|
189
|
-
format_as_messages: true
|
190
184
|
start_tags: ["[Start of Explanation]", "[Start of Rating]"]
|
191
185
|
end_tags: ["[End of Explanation]", "[End of Rating]"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sdg_hub
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.2
|
4
4
|
Summary: Synthetic Data Generation
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
6
6
|
License: Apache-2.0
|
@@ -27,7 +27,6 @@ Requires-Dist: datasets<4.0.0,>=2.18.0
|
|
27
27
|
Requires-Dist: httpx<1.0.0,>=0.25.0
|
28
28
|
Requires-Dist: jinja2
|
29
29
|
Requires-Dist: litellm<1.75.0,>=1.73.0
|
30
|
-
Requires-Dist: openai<2.0.0,>=1.13.3
|
31
30
|
Requires-Dist: rich
|
32
31
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
33
32
|
Requires-Dist: python-dotenv<2.0.0,>=1.0.0
|
@@ -92,6 +91,8 @@ A modular Python framework for building synthetic data generation pipelines usin
|
|
92
91
|
|
93
92
|
**📊 Rich Monitoring** - Detailed logging with progress bars and execution summaries.
|
94
93
|
|
94
|
+
**📋 Dataset Schema Discovery** - Instantly discover required data formats. Get empty datasets with correct schema for easy validation and data preparation.
|
95
|
+
|
95
96
|
**🧩 Easily Extensible** - Create custom blocks with simple inheritance. Rich logging and monitoring built-in.
|
96
97
|
|
97
98
|
|
@@ -121,7 +122,7 @@ uv pip install sdg-hub[examples]
|
|
121
122
|
|
122
123
|
## 🚀 Quick Start
|
123
124
|
|
124
|
-
###
|
125
|
+
### Core Concepts
|
125
126
|
|
126
127
|
**Blocks** are composable units that transform datasets - think of them as data processing Lego pieces. Each block performs a specific task: LLM chat, text parsing, evaluation, or transformation.
|
127
128
|
|
@@ -136,7 +137,7 @@ dataset → Block₁ → Block₂ → Block₃ → enriched_dataset
|
|
136
137
|
|
137
138
|
#### Flow Discovery
|
138
139
|
```python
|
139
|
-
from sdg_hub import FlowRegistry
|
140
|
+
from sdg_hub import FlowRegistry, Flow
|
140
141
|
|
141
142
|
# Auto-discover all available flows (no setup needed!)
|
142
143
|
FlowRegistry.discover_flows()
|
@@ -150,16 +151,20 @@ qa_flows = FlowRegistry.search_flows(tag="question-generation")
|
|
150
151
|
print(f"QA flows: {qa_flows}")
|
151
152
|
```
|
152
153
|
|
153
|
-
|
154
|
+
Each flow has a **unique, human-readable ID** automatically generated from its name. These IDs provide a convenient shorthand for referencing flows:
|
155
|
+
|
154
156
|
```python
|
155
|
-
|
156
|
-
|
157
|
+
# Every flow gets a deterministic ID
|
158
|
+
# Same flow name always generates the same ID
|
159
|
+
flow_id = "small-rock-799"
|
157
160
|
|
158
|
-
#
|
159
|
-
|
160
|
-
flow_path = FlowRegistry.get_flow_path(flow_name)
|
161
|
+
# Use ID to reference the flow
|
162
|
+
flow_path = FlowRegistry.get_flow_path(flow_id)
|
161
163
|
flow = Flow.from_yaml(flow_path)
|
164
|
+
```
|
162
165
|
|
166
|
+
#### Discovering Models and Configuring them
|
167
|
+
```python
|
163
168
|
# Discover recommended models
|
164
169
|
default_model = flow.get_default_model()
|
165
170
|
recommendations = flow.get_model_recommendations()
|
@@ -171,21 +176,52 @@ flow.set_model_config(
|
|
171
176
|
api_base="http://localhost:8000/v1",
|
172
177
|
api_key="your_key",
|
173
178
|
)
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
'
|
186
|
-
'
|
179
|
+
```
|
180
|
+
#### Discover dataset requirements and create your dataset
|
181
|
+
```python
|
182
|
+
# First, discover what data the flow needs
|
183
|
+
# Get an empty dataset with the exact schema needed
|
184
|
+
schema_dataset = flow.get_dataset_schema() # Get empty dataset with correct schema
|
185
|
+
print(f"Required columns: {schema_dataset.column_names}")
|
186
|
+
print(f"Schema: {schema_dataset.features}")
|
187
|
+
|
188
|
+
# Option 1: Add data directly to the schema dataset
|
189
|
+
dataset = schema_dataset.add_item({
|
190
|
+
'document': 'Your document text here...',
|
191
|
+
'document_outline': '1. Topic A; 2. Topic B; 3. Topic C',
|
192
|
+
'domain': 'Computer Science',
|
193
|
+
'icl_document': 'Example document for in-context learning...',
|
194
|
+
'icl_query_1': 'Example question 1?',
|
195
|
+
'icl_response_1': 'Example answer 1',
|
196
|
+
'icl_query_2': 'Example question 2?',
|
197
|
+
'icl_response_2': 'Example answer 2',
|
198
|
+
'icl_query_3': 'Example question 3?',
|
199
|
+
'icl_response_3': 'Example answer 3'
|
187
200
|
})
|
188
201
|
|
202
|
+
# Option 2: Create your own dataset and validate the schema
|
203
|
+
my_dataset = Dataset.from_dict(my_data_dict)
|
204
|
+
if my_dataset.features == schema_dataset.features:
|
205
|
+
print("✅ Schema matches - ready to generate!")
|
206
|
+
dataset = my_dataset
|
207
|
+
else:
|
208
|
+
print("❌ Schema mismatch - check your columns")
|
209
|
+
|
210
|
+
# Option 3: Get raw requirements for detailed inspection
|
211
|
+
requirements = flow.get_dataset_requirements()
|
212
|
+
if requirements:
|
213
|
+
print(f"Required: {requirements.required_columns}")
|
214
|
+
print(f"Optional: {requirements.optional_columns}")
|
215
|
+
print(f"Min samples: {requirements.min_samples}")
|
216
|
+
```
|
217
|
+
|
218
|
+
#### Dry Run and Generate
|
219
|
+
```python
|
220
|
+
# Quick Testing with Dry Run
|
221
|
+
dry_result = flow.dry_run(dataset, sample_size=1)
|
222
|
+
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
223
|
+
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
224
|
+
|
189
225
|
# Generate high-quality QA pairs
|
190
226
|
result = flow.generate(dataset)
|
191
227
|
|
@@ -196,14 +232,6 @@ faithfulness_scores = result['faithfulness_judgment']
|
|
196
232
|
relevancy_scores = result['relevancy_score']
|
197
233
|
```
|
198
234
|
|
199
|
-
#### Quick Testing with Dry Run
|
200
|
-
```python
|
201
|
-
# Test the flow with a small sample first
|
202
|
-
dry_result = flow.dry_run(dataset, sample_size=1)
|
203
|
-
print(f"Dry run completed in {dry_result['execution_time_seconds']:.2f}s")
|
204
|
-
print(f"Output columns: {dry_result['final_dataset']['columns']}")
|
205
|
-
```
|
206
|
-
|
207
235
|
|
208
236
|
## 📄 License
|
209
237
|
|
@@ -1,10 +1,10 @@
|
|
1
1
|
sdg_hub/__init__.py,sha256=Tw-6R5a8_W1kJcTAsW3R9ltBDP1dy5-fe7Tvt3cSyCQ,550
|
2
|
-
sdg_hub/_version.py,sha256=
|
2
|
+
sdg_hub/_version.py,sha256=o3ZTescp-19Z9cvBGq9dQnbppljgzdUYUf98Nov0spY,704
|
3
3
|
sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sdg_hub/core/__init__.py,sha256=NwqB4fwhC29W50VW7QXZssLxx122YvgO9LHDLdgAnrI,496
|
5
5
|
sdg_hub/core/blocks/__init__.py,sha256=9sCkCvDQzJGSedaePVlEIpbNwrkBz_K500VW_6FLhuE,1601
|
6
6
|
sdg_hub/core/blocks/base.py,sha256=TrzUAkG7Tiquk0Z3SOFsb5mRnHd1IbHH6gFPVH1P7T8,10424
|
7
|
-
sdg_hub/core/blocks/registry.py,sha256=
|
7
|
+
sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
|
8
8
|
sdg_hub/core/blocks/deprecated_blocks/__init__.py,sha256=RDu3MWFStDQko-TKkx8tGoB1UTatP_RSldZK43zHDvY,889
|
9
9
|
sdg_hub/core/blocks/deprecated_blocks/combine_columns.py,sha256=HCvpaYsAwgx1Dm0vIshcWsKoVsRT0KrmKp9j4oqtByc,2757
|
10
10
|
sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py,sha256=maCaaEs0EMMzt7L1xm7fAH3ylaFMHEkeC_dtOw3FrjU,2694
|
@@ -13,39 +13,44 @@ sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py,sha256=IenCskrPEv09h2uT
|
|
13
13
|
sdg_hub/core/blocks/deprecated_blocks/llmblock.py,sha256=34lzC43BODpMk5AwlWA1ctdYPmN7cA6WL5vMXaI0P0Y,20385
|
14
14
|
sdg_hub/core/blocks/deprecated_blocks/rename_columns.py,sha256=thp-mHtkRmUw_nYKpldy_mLWR2AvC5YUhbqDETM6-T0,2620
|
15
15
|
sdg_hub/core/blocks/deprecated_blocks/sample_populator.py,sha256=UdueMApxOmPWaxxMrw7b1v74fKJBfqqRATEBqgmVtNw,1737
|
16
|
-
sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=
|
16
|
+
sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=nWecsVsW8DvBcqAF_LOqXmW-5MQ28uN3d1y6wkSy38c,2960
|
17
17
|
sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py,sha256=44TQu-rK5isia-otMVB1zHd8D-wWmu3C8CI1NLtfY5s,2729
|
18
18
|
sdg_hub/core/blocks/evaluation/__init__.py,sha256=kFXee-vsVVdU2XtLio9qHgPx_a0zoB_rQr509EKBGJc,357
|
19
|
-
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=
|
20
|
-
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=
|
21
|
-
sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=
|
19
|
+
sdg_hub/core/blocks/evaluation/evaluate_faithfulness_block.py,sha256=vFi3YIxVPNnzgdenIeAl7yUb4OOUY_uUOXS-pWLsDmw,12223
|
20
|
+
sdg_hub/core/blocks/evaluation/evaluate_relevancy_block.py,sha256=NXT1lixR-JnOXNlBCbMjULcpu4kh2SthhwCWEobiBt0,12115
|
21
|
+
sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=LKoIHdxUuTVO24n_M9cAliEj56uEe2kQAecKTRz65zI,12465
|
22
22
|
sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
|
23
23
|
sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=H8Gif0q9Wc_d1TnVow8Zpsg7blJOFGN1EZmV6OPpkcg,5971
|
24
|
-
sdg_hub/core/blocks/llm/__init__.py,sha256=
|
25
|
-
sdg_hub/core/blocks/llm/client_manager.py,sha256=
|
26
|
-
sdg_hub/core/blocks/llm/config.py,sha256=
|
24
|
+
sdg_hub/core/blocks/llm/__init__.py,sha256=N6-Prgd4X85oWbMQzhYMrq7OX-NTJm57cghowK-val0,844
|
25
|
+
sdg_hub/core/blocks/llm/client_manager.py,sha256=PDf07t2s68WQaoU-LTRke6nQUDWDKwiV4ptAT7Cbn18,14047
|
26
|
+
sdg_hub/core/blocks/llm/config.py,sha256=gc4xp5D20MSlKMFEos0QAaKUwgbZpBtMGXmn6LsIk78,11289
|
27
27
|
sdg_hub/core/blocks/llm/error_handler.py,sha256=7T-019ZFB9qgZoX1ybIiXyaLjPzrF96qcKmUu6vmO6g,12178
|
28
|
-
sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=
|
28
|
+
sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=J-iDJTY-txuduFXx-NUhyohWpRnEaYIg4f0VsSgpjVw,22641
|
29
|
+
sdg_hub/core/blocks/llm/llm_chat_with_parsing_retry_block.py,sha256=H7LqYpEiFO1g2cxncAl4vkLhQxAYgGpV6cUyQTSG03k,27628
|
29
30
|
sdg_hub/core/blocks/llm/prompt_builder_block.py,sha256=fkJd718X1oYlMY1cjo_8WCO16Gl8Tm0bUPWR78E_uws,13935
|
30
|
-
sdg_hub/core/blocks/llm/text_parser_block.py,sha256=
|
31
|
+
sdg_hub/core/blocks/llm/text_parser_block.py,sha256=uYcof6cy1tgMG_mHpFBW4xhVXg40rNhTerM5EqAvcEo,14187
|
31
32
|
sdg_hub/core/blocks/transform/__init__.py,sha256=Y_3izPCtgnMbFK-gBMeLHZspSrNLgbGheAJXU57XfFw,746
|
32
33
|
sdg_hub/core/blocks/transform/duplicate_columns.py,sha256=SaP7rIF4ZFEFFa50aU2xGNIuddXaEZrKxdWfHjzFpVI,2833
|
33
|
-
sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=
|
34
|
+
sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=XC_a7Skbd3mu7f4ra8fGWPxMwqUMSjJkQ7Ag7vflwJA,8235
|
34
35
|
sdg_hub/core/blocks/transform/melt_columns.py,sha256=vaYa5Taq6GhNZYWFL4uPK3-SfN2BsKEm-wvjd2EYYoI,4382
|
35
36
|
sdg_hub/core/blocks/transform/rename_columns.py,sha256=qeB5L2utqDQnutUetH1VKZSqDiJSH_yUp5EFCV-XCVI,1998
|
36
37
|
sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
|
37
38
|
sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
|
38
39
|
sdg_hub/core/flow/__init__.py,sha256=N2NZGngvd7qpT5FI_knKukUFM0IkD9K5jdTi-gDeUI4,475
|
39
|
-
sdg_hub/core/flow/base.py,sha256=
|
40
|
-
sdg_hub/core/flow/
|
41
|
-
sdg_hub/core/flow/
|
42
|
-
sdg_hub/core/flow/
|
43
|
-
sdg_hub/core/flow/
|
44
|
-
sdg_hub/core/
|
45
|
-
sdg_hub/core/utils/
|
40
|
+
sdg_hub/core/flow/base.py,sha256=eneLS9GR21q9nK3M8qZzIyJ-OeFF2Lp6ZwzQjBVnbyk,50364
|
41
|
+
sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
|
42
|
+
sdg_hub/core/flow/metadata.py,sha256=h9jpvAzWsF5n4ztZMzwa9ZNgnzKTHmFWdn7YbyJLHCw,12977
|
43
|
+
sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
|
44
|
+
sdg_hub/core/flow/registry.py,sha256=DzCqEEgwhvwnCBAGLogoMVdwXh4pCHrxOWqoxam7O8I,12162
|
45
|
+
sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
|
46
|
+
sdg_hub/core/utils/__init__.py,sha256=C2FzLn3dHprwGJDEgI4fyFS3aoCJR-9PhHsunxropJ8,351
|
47
|
+
sdg_hub/core/utils/datautils.py,sha256=vvZSNZ94vMQMh9Bs99X92UPwSNzyyYwO3V4w3O3QYoA,2801
|
46
48
|
sdg_hub/core/utils/error_handling.py,sha256=yku8cGj_nKCyXDsnb-mHCpgukkkAMucJ4iAUrIzqysc,5510
|
49
|
+
sdg_hub/core/utils/flow_id_words.yaml,sha256=5QHpQdP7zwahRuooyAlJIwBY7WcDR7vtbJXxVJqujbg,2317
|
50
|
+
sdg_hub/core/utils/flow_identifier.py,sha256=aAHfK_G9AwEtMglLRMdMpi_AI1dciub5UqBGm4yb2HE,2841
|
47
51
|
sdg_hub/core/utils/logger_config.py,sha256=MPYdpyNXh_pxFUOAvSCHa98LGjxjaLXoUoqWekqTG4s,422
|
48
52
|
sdg_hub/core/utils/path_resolution.py,sha256=yWof4kGNpQ5dKcrVHg0h9KfOKLZ6ROjdfsLAZsQT5rM,2000
|
53
|
+
sdg_hub/core/utils/yaml_utils.py,sha256=tShCd-FFkp0xlKnLe7dXsMOR4AvT9d2qRUmu4ZnPSEY,1458
|
49
54
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
55
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
56
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/atomic_facts.yaml,sha256=xgUNY793y4lcpdtuWm5Ah1CmbU2gvvPQCpZMMa6kPXU,2447
|
@@ -54,10 +59,10 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
|
|
54
59
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
|
55
60
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
|
56
61
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
|
57
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=
|
62
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=oyrLRjEnmioMa_G_sd9yQK_nBt4arwWV5fvKgzYE2ds,6090
|
58
63
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
|
59
|
-
sdg_hub-0.2.
|
60
|
-
sdg_hub-0.2.
|
61
|
-
sdg_hub-0.2.
|
62
|
-
sdg_hub-0.2.
|
63
|
-
sdg_hub-0.2.
|
64
|
+
sdg_hub-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
65
|
+
sdg_hub-0.2.2.dist-info/METADATA,sha256=OHIiUh4AqSHVW-asGQdUp67TXL_dCRV7NVC0E14IwM0,9647
|
66
|
+
sdg_hub-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
67
|
+
sdg_hub-0.2.2.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
|
68
|
+
sdg_hub-0.2.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|