sdg-hub 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +2 -2
- sdg_hub/core/blocks/__init__.py +0 -22
- sdg_hub/core/blocks/transform/rename_columns.py +19 -0
- sdg_hub/core/flow/base.py +146 -81
- sdg_hub/core/utils/__init__.py +11 -3
- sdg_hub/core/utils/flow_metrics.py +116 -0
- sdg_hub/core/utils/time_estimator.py +344 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +5 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +5 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +5 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +6 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +16 -10
- {sdg_hub-0.4.1.dist-info → sdg_hub-0.5.0.dist-info}/METADATA +2 -2
- {sdg_hub-0.4.1.dist-info → sdg_hub-0.5.0.dist-info}/RECORD +17 -27
- sdg_hub/core/blocks/deprecated_blocks/__init__.py +0 -29
- sdg_hub/core/blocks/deprecated_blocks/combine_columns.py +0 -93
- sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py +0 -88
- sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py +0 -103
- sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py +0 -94
- sdg_hub/core/blocks/deprecated_blocks/llmblock.py +0 -479
- sdg_hub/core/blocks/deprecated_blocks/rename_columns.py +0 -88
- sdg_hub/core/blocks/deprecated_blocks/sample_populator.py +0 -58
- sdg_hub/core/blocks/deprecated_blocks/selector.py +0 -97
- sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py +0 -88
- sdg_hub/core/flow/migration.py +0 -198
- {sdg_hub-0.4.1.dist-info → sdg_hub-0.5.0.dist-info}/WHEEL +0 -0
- {sdg_hub-0.4.1.dist-info → sdg_hub-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.4.1.dist-info → sdg_hub-0.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,344 @@
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
2
|
+
"""Time estimation utility for predicting full dataset execution time from dry_run results."""
|
3
|
+
|
4
|
+
# Standard
|
5
|
+
from typing import Dict, Optional
|
6
|
+
import math
|
7
|
+
|
8
|
+
# Default max concurrent requests used during dry runs
|
9
|
+
DRY_RUN_MAX_CONCURRENT = 100
|
10
|
+
|
11
|
+
# Conservative estimation factor (20% buffer for API variability, network latency, etc.)
|
12
|
+
ESTIMATION_BUFFER_FACTOR = 1.2
|
13
|
+
|
14
|
+
|
15
|
+
def is_llm_using_block(block_info: Dict) -> bool:
|
16
|
+
"""Detect if a block uses LLMs.
|
17
|
+
|
18
|
+
Identifies blocks that make LLM API calls based on their type or parameters.
|
19
|
+
This is used to calculate request amplification for LLM blocks.
|
20
|
+
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
block_info : Dict
|
24
|
+
Block information from dry_run results containing block_type and parameters_used.
|
25
|
+
|
26
|
+
Returns
|
27
|
+
-------
|
28
|
+
bool
|
29
|
+
True if the block uses LLMs, False otherwise.
|
30
|
+
|
31
|
+
Examples
|
32
|
+
--------
|
33
|
+
>>> block = {"block_type": "LLMChatBlock", "parameters_used": {"model": "gpt-4"}}
|
34
|
+
>>> is_llm_using_block(block)
|
35
|
+
True
|
36
|
+
"""
|
37
|
+
block_type = block_info.get("block_type", "")
|
38
|
+
|
39
|
+
# Direct LLM blocks or evaluation/verification blocks
|
40
|
+
if any(kw in block_type for kw in ["LLMChatBlock", "Evaluate", "Verify"]):
|
41
|
+
return True
|
42
|
+
|
43
|
+
# Check for model parameters
|
44
|
+
params = block_info.get("parameters_used", {})
|
45
|
+
if any(key in params for key in ["model", "api_base", "api_key"]):
|
46
|
+
return True
|
47
|
+
|
48
|
+
return False
|
49
|
+
|
50
|
+
|
51
|
+
def calculate_block_throughput(
|
52
|
+
block_1: Dict, block_2: Dict, samples_1: int, samples_2: int
|
53
|
+
) -> Dict:
|
54
|
+
"""Calculate throughput and amplification from two dry runs.
|
55
|
+
|
56
|
+
Analyzes performance metrics from two dry runs with different sample sizes
|
57
|
+
to estimate throughput (requests/second), amplification factor, and startup overhead.
|
58
|
+
|
59
|
+
Parameters
|
60
|
+
----------
|
61
|
+
block_1 : Dict
|
62
|
+
Block execution info from first dry run.
|
63
|
+
block_2 : Dict
|
64
|
+
Block execution info from second dry run.
|
65
|
+
samples_1 : int
|
66
|
+
Number of samples in first dry run.
|
67
|
+
samples_2 : int
|
68
|
+
Number of samples in second dry run.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
Dict
|
73
|
+
Dictionary containing:
|
74
|
+
- throughput: float, requests per second
|
75
|
+
- amplification: float, average requests per input sample
|
76
|
+
- startup_overhead: float, fixed startup time in seconds
|
77
|
+
|
78
|
+
Raises
|
79
|
+
------
|
80
|
+
ValueError
|
81
|
+
If throughput cannot be calculated due to invalid measurements.
|
82
|
+
|
83
|
+
Examples
|
84
|
+
--------
|
85
|
+
>>> block1 = {"execution_time_seconds": 1.0, "input_rows": 1, "block_name": "test"}
|
86
|
+
>>> block2 = {"execution_time_seconds": 2.0, "input_rows": 5, "block_name": "test"}
|
87
|
+
>>> result = calculate_block_throughput(block1, block2, 1, 5)
|
88
|
+
>>> assert result["throughput"] > 0
|
89
|
+
"""
|
90
|
+
time_1 = block_1.get("execution_time_seconds", 0)
|
91
|
+
time_2 = block_2.get("execution_time_seconds", 0)
|
92
|
+
requests_1 = block_1.get("input_rows", 0)
|
93
|
+
requests_2 = block_2.get("input_rows", 0)
|
94
|
+
|
95
|
+
# Calculate amplification (requests per sample)
|
96
|
+
amp_1 = requests_1 / samples_1 if samples_1 > 0 else 1
|
97
|
+
amp_2 = requests_2 / samples_2 if samples_2 > 0 else 1
|
98
|
+
avg_amplification = (amp_1 + amp_2) / 2
|
99
|
+
|
100
|
+
# Use linear scaling to extract throughput and overhead from two data points
|
101
|
+
# Model: time = startup_overhead + (requests / throughput)
|
102
|
+
|
103
|
+
if requests_2 > requests_1 and time_2 > time_1:
|
104
|
+
# Calculate marginal time per request (slope of the line)
|
105
|
+
marginal_time = (time_2 - time_1) / (requests_2 - requests_1)
|
106
|
+
|
107
|
+
# Throughput is the inverse of marginal time
|
108
|
+
measured_throughput = 1.0 / marginal_time if marginal_time > 0 else 0
|
109
|
+
|
110
|
+
# Y-intercept is the startup overhead
|
111
|
+
startup_overhead = max(0, time_1 - (requests_1 * marginal_time))
|
112
|
+
else:
|
113
|
+
# Fallback to simple calculation if we don't have good data for scaling
|
114
|
+
throughput_1 = requests_1 / time_1 if time_1 > 0 else 0
|
115
|
+
throughput_2 = requests_2 / time_2 if time_2 > 0 else 0
|
116
|
+
measured_throughput = max(throughput_1, throughput_2)
|
117
|
+
|
118
|
+
# Estimate overhead as a small fraction of time
|
119
|
+
startup_overhead = min(2.0, time_1 * 0.1) # Assume 10% overhead, max 2 seconds
|
120
|
+
|
121
|
+
# If we have no valid measurements, raise an error
|
122
|
+
if measured_throughput == 0:
|
123
|
+
raise ValueError(
|
124
|
+
f"Cannot calculate throughput for block '{block_1.get('block_name', 'unknown')}': "
|
125
|
+
f"No valid measurements from dry runs (time_1={time_1}, time_2={time_2}, "
|
126
|
+
f"requests_1={requests_1}, requests_2={requests_2})"
|
127
|
+
)
|
128
|
+
|
129
|
+
return {
|
130
|
+
"throughput": measured_throughput,
|
131
|
+
"amplification": avg_amplification,
|
132
|
+
"startup_overhead": startup_overhead,
|
133
|
+
}
|
134
|
+
|
135
|
+
|
136
|
+
def calculate_time_with_pipeline(
|
137
|
+
num_requests: float,
|
138
|
+
throughput: float,
|
139
|
+
startup_overhead: float,
|
140
|
+
max_concurrent: int = DRY_RUN_MAX_CONCURRENT,
|
141
|
+
) -> float:
|
142
|
+
"""Calculate time considering pipeline behavior and max concurrent limit.
|
143
|
+
|
144
|
+
Models the execution time for a given number of requests based on throughput,
|
145
|
+
startup overhead, and concurrency constraints. Applies non-linear scaling
|
146
|
+
for diminishing returns at high concurrency levels.
|
147
|
+
|
148
|
+
Parameters
|
149
|
+
----------
|
150
|
+
num_requests : float
|
151
|
+
Total number of requests to process.
|
152
|
+
throughput : float
|
153
|
+
Base throughput in requests per second.
|
154
|
+
startup_overhead : float
|
155
|
+
Fixed startup time overhead in seconds.
|
156
|
+
max_concurrent : int, optional
|
157
|
+
Maximum number of concurrent requests, by default 100.
|
158
|
+
|
159
|
+
Returns
|
160
|
+
-------
|
161
|
+
float
|
162
|
+
Estimated total execution time in seconds.
|
163
|
+
|
164
|
+
Examples
|
165
|
+
--------
|
166
|
+
>>> time = calculate_time_with_pipeline(1000, 10.0, 0.5, 50)
|
167
|
+
>>> assert time > 0
|
168
|
+
"""
|
169
|
+
if num_requests <= 0:
|
170
|
+
return 0
|
171
|
+
|
172
|
+
# Validate and clamp max_concurrent to avoid division by zero
|
173
|
+
if max_concurrent is None or max_concurrent <= 0:
|
174
|
+
max_concurrent = 1
|
175
|
+
|
176
|
+
# The throughput is what we measured - it represents the server's processing capability
|
177
|
+
if max_concurrent == 1:
|
178
|
+
# Sequential execution - no pipelining benefit
|
179
|
+
effective_throughput = throughput
|
180
|
+
else:
|
181
|
+
# Concurrent execution - small pipelining benefit
|
182
|
+
# At most 10% improvement from perfect pipelining (conservative estimate)
|
183
|
+
# Logarithmic growth to model diminishing returns
|
184
|
+
pipelining_factor = 1.0 + (0.1 * math.log(max_concurrent) / math.log(100))
|
185
|
+
pipelining_factor = min(pipelining_factor, 1.1) # Cap at 10% improvement
|
186
|
+
effective_throughput = throughput * pipelining_factor
|
187
|
+
|
188
|
+
# Calculate total time
|
189
|
+
base_time = startup_overhead + (num_requests / effective_throughput)
|
190
|
+
|
191
|
+
return base_time
|
192
|
+
|
193
|
+
|
194
|
+
def estimate_execution_time(
|
195
|
+
dry_run_1: Dict,
|
196
|
+
dry_run_2: Optional[Dict] = None,
|
197
|
+
total_dataset_size: Optional[int] = None,
|
198
|
+
max_concurrency: Optional[int] = None,
|
199
|
+
) -> Dict:
|
200
|
+
"""Estimate execution time based on dry run results.
|
201
|
+
|
202
|
+
Estimates the total execution time for a full dataset based on one or two
|
203
|
+
dry runs with smaller sample sizes. For async blocks (with two dry runs),
|
204
|
+
calculates throughput and concurrency benefits. For sync blocks (single dry run),
|
205
|
+
performs simple linear scaling.
|
206
|
+
|
207
|
+
The estimates include a conservative buffer (20%) to account for API variability,
|
208
|
+
network latency, and other real-world factors.
|
209
|
+
|
210
|
+
Parameters
|
211
|
+
----------
|
212
|
+
dry_run_1 : Dict
|
213
|
+
Results from first dry run, must contain 'sample_size' and 'execution_time_seconds'.
|
214
|
+
dry_run_2 : Optional[Dict], optional
|
215
|
+
Results from second dry run for async estimation, by default None.
|
216
|
+
total_dataset_size : Optional[int], optional
|
217
|
+
Size of full dataset to estimate for. If None, uses original_dataset_size from dry_run_1.
|
218
|
+
max_concurrency : Optional[int], optional
|
219
|
+
Maximum concurrent requests allowed, by default 100.
|
220
|
+
|
221
|
+
Returns
|
222
|
+
-------
|
223
|
+
Dict
|
224
|
+
Estimation results containing:
|
225
|
+
- estimated_time_seconds: float, estimated time with current configuration (includes buffer)
|
226
|
+
- total_estimated_requests: int, total LLM requests (0 for sync blocks)
|
227
|
+
- block_estimates: list, per-block estimates (for async blocks)
|
228
|
+
- note: str, additional information about the estimation
|
229
|
+
|
230
|
+
Examples
|
231
|
+
--------
|
232
|
+
>>> dry_run = {"sample_size": 2, "execution_time_seconds": 10.0}
|
233
|
+
>>> result = estimate_execution_time(dry_run, total_dataset_size=100)
|
234
|
+
>>> assert result["estimated_time_seconds"] > 0
|
235
|
+
>>>
|
236
|
+
>>> # With two dry runs for async estimation
|
237
|
+
>>> dry_run_1 = {"sample_size": 1, "execution_time_seconds": 5.0, "blocks_executed": [...]}
|
238
|
+
>>> dry_run_2 = {"sample_size": 5, "execution_time_seconds": 20.0, "blocks_executed": [...]}
|
239
|
+
>>> result = estimate_execution_time(dry_run_1, dry_run_2, total_dataset_size=1000)
|
240
|
+
>>> assert result["estimated_time_seconds"] > 0
|
241
|
+
"""
|
242
|
+
# Set defaults
|
243
|
+
if max_concurrency is None:
|
244
|
+
max_concurrency = DRY_RUN_MAX_CONCURRENT
|
245
|
+
|
246
|
+
if total_dataset_size is None:
|
247
|
+
total_dataset_size = dry_run_1.get(
|
248
|
+
"original_dataset_size", dry_run_1["sample_size"]
|
249
|
+
)
|
250
|
+
|
251
|
+
# Get sample sizes
|
252
|
+
samples_1 = dry_run_1["sample_size"]
|
253
|
+
samples_2 = (
|
254
|
+
dry_run_2["sample_size"] if dry_run_2 else 5
|
255
|
+
) # Default to 5 if not provided
|
256
|
+
|
257
|
+
# If only one dry run, do simple scaling
|
258
|
+
if dry_run_2 is None:
|
259
|
+
# Process each block individually for synchronous execution
|
260
|
+
blocks_executed = dry_run_1.get("blocks_executed", [])
|
261
|
+
if not blocks_executed:
|
262
|
+
# Fallback to simple scaling if no block details available
|
263
|
+
total_time = dry_run_1["execution_time_seconds"]
|
264
|
+
simple_estimate = (total_time / samples_1) * total_dataset_size
|
265
|
+
# Apply conservative buffer
|
266
|
+
simple_estimate = simple_estimate * ESTIMATION_BUFFER_FACTOR
|
267
|
+
return {
|
268
|
+
"estimated_time_seconds": simple_estimate,
|
269
|
+
"total_estimated_requests": 0,
|
270
|
+
"note": "Synchronous execution - linear scaling from dry run",
|
271
|
+
}
|
272
|
+
|
273
|
+
# Calculate time for each block and sum them
|
274
|
+
total_estimated_time = 0
|
275
|
+
for block in blocks_executed:
|
276
|
+
block_time = block.get("execution_time_seconds", 0)
|
277
|
+
input_rows = block.get("input_rows", samples_1)
|
278
|
+
|
279
|
+
# Calculate time per row for this block
|
280
|
+
if input_rows > 0:
|
281
|
+
time_per_row = block_time / input_rows
|
282
|
+
block_total_time = time_per_row * total_dataset_size
|
283
|
+
total_estimated_time += block_total_time
|
284
|
+
|
285
|
+
# Apply conservative buffer
|
286
|
+
total_estimated_time = total_estimated_time * ESTIMATION_BUFFER_FACTOR
|
287
|
+
return {
|
288
|
+
"estimated_time_seconds": total_estimated_time,
|
289
|
+
"total_estimated_requests": 0,
|
290
|
+
"note": "Synchronous execution - no concurrency",
|
291
|
+
}
|
292
|
+
|
293
|
+
# Analyze each block with async execution
|
294
|
+
block_estimates = []
|
295
|
+
total_time = 0
|
296
|
+
total_requests = 0
|
297
|
+
|
298
|
+
# Process each block
|
299
|
+
for i, block_1 in enumerate(dry_run_1.get("blocks_executed", [])):
|
300
|
+
if i >= len(dry_run_2.get("blocks_executed", [])):
|
301
|
+
break
|
302
|
+
|
303
|
+
block_2 = dry_run_2["blocks_executed"][i]
|
304
|
+
|
305
|
+
# Only process LLM blocks
|
306
|
+
if not is_llm_using_block(block_1):
|
307
|
+
continue
|
308
|
+
|
309
|
+
# Calculate throughput and amplification
|
310
|
+
analysis = calculate_block_throughput(block_1, block_2, samples_1, samples_2)
|
311
|
+
|
312
|
+
# Estimate requests for full dataset
|
313
|
+
estimated_requests = total_dataset_size * analysis["amplification"]
|
314
|
+
|
315
|
+
# Calculate time with pipeline model
|
316
|
+
block_time = calculate_time_with_pipeline(
|
317
|
+
estimated_requests,
|
318
|
+
analysis["throughput"],
|
319
|
+
analysis["startup_overhead"],
|
320
|
+
max_concurrency,
|
321
|
+
)
|
322
|
+
|
323
|
+
total_time += block_time
|
324
|
+
total_requests += estimated_requests
|
325
|
+
|
326
|
+
block_estimates.append(
|
327
|
+
{
|
328
|
+
"block": block_1["block_name"],
|
329
|
+
"estimated_requests": estimated_requests,
|
330
|
+
"throughput": analysis["throughput"],
|
331
|
+
"estimated_time": block_time,
|
332
|
+
"amplification": analysis["amplification"],
|
333
|
+
"startup_overhead": analysis["startup_overhead"],
|
334
|
+
}
|
335
|
+
)
|
336
|
+
|
337
|
+
# Apply conservative buffer to account for API variability, network issues, etc.
|
338
|
+
total_time = total_time * ESTIMATION_BUFFER_FACTOR
|
339
|
+
|
340
|
+
return {
|
341
|
+
"estimated_time_seconds": total_time,
|
342
|
+
"total_estimated_requests": int(total_requests),
|
343
|
+
"block_estimates": block_estimates,
|
344
|
+
}
|
@@ -77,9 +77,13 @@ blocks:
|
|
77
77
|
- ''
|
78
78
|
- block_type: RenameColumnsBlock
|
79
79
|
block_config:
|
80
|
-
block_name:
|
80
|
+
block_name: rename_to_raw_document_column
|
81
81
|
input_cols:
|
82
82
|
document: raw_document
|
83
|
+
- block_type: RenameColumnsBlock
|
84
|
+
block_config:
|
85
|
+
block_name: rename_to_document_column
|
86
|
+
input_cols:
|
83
87
|
summary: document
|
84
88
|
- block_type: PromptBuilderBlock
|
85
89
|
block_config:
|
@@ -79,9 +79,13 @@ blocks:
|
|
79
79
|
- ''
|
80
80
|
- block_type: RenameColumnsBlock
|
81
81
|
block_config:
|
82
|
-
block_name:
|
82
|
+
block_name: rename_to_raw_document_column
|
83
83
|
input_cols:
|
84
84
|
document: raw_document
|
85
|
+
- block_type: RenameColumnsBlock
|
86
|
+
block_config:
|
87
|
+
block_name: rename_to_document_column
|
88
|
+
input_cols:
|
85
89
|
summary: document
|
86
90
|
- block_type: PromptBuilderBlock
|
87
91
|
block_config:
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml
CHANGED
@@ -72,9 +72,13 @@ blocks:
|
|
72
72
|
parsing_pattern: '(?:^|\n)\s*\d+\.\s+(.*?)(?=\n\s*\d+\.\s+|\Z)'
|
73
73
|
- block_type: RenameColumnsBlock
|
74
74
|
block_config:
|
75
|
-
block_name:
|
75
|
+
block_name: rename_to_raw_document_column
|
76
76
|
input_cols:
|
77
77
|
document: raw_document
|
78
|
+
- block_type: RenameColumnsBlock
|
79
|
+
block_config:
|
80
|
+
block_name: rename_to_document_column
|
81
|
+
input_cols:
|
78
82
|
atomic_facts: document
|
79
83
|
- block_type: PromptBuilderBlock
|
80
84
|
block_config:
|
@@ -134,10 +134,15 @@ blocks:
|
|
134
134
|
input_cols: [summary_detailed, summary_extractive, summary_atomic_facts, base_document]
|
135
135
|
output_cols: [summary, dataset_type]
|
136
136
|
|
137
|
+
- block_type: RenameColumnsBlock
|
138
|
+
block_config:
|
139
|
+
block_name: rename_to_raw_document_column
|
140
|
+
input_cols: {document: raw_document}
|
141
|
+
|
137
142
|
- block_type: RenameColumnsBlock
|
138
143
|
block_config:
|
139
144
|
block_name: rename_to_document_column
|
140
|
-
input_cols: {
|
145
|
+
input_cols: {summary: document}
|
141
146
|
|
142
147
|
- block_type: PromptBuilderBlock
|
143
148
|
block_config:
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml
CHANGED
@@ -19,7 +19,7 @@ metadata:
|
|
19
19
|
- "japanese"
|
20
20
|
|
21
21
|
license: "Apache-2.0"
|
22
|
-
|
22
|
+
|
23
23
|
dataset_requirements:
|
24
24
|
required_columns:
|
25
25
|
- "document"
|
@@ -54,17 +54,19 @@ blocks:
|
|
54
54
|
output_cols: raw_summary_detailed
|
55
55
|
max_tokens: 2048
|
56
56
|
async_mode: true
|
57
|
+
# n: 2
|
57
58
|
|
58
59
|
- block_type: LLMParserBlock
|
59
60
|
block_config:
|
60
|
-
block_name:
|
61
|
+
block_name: detailed_summary
|
61
62
|
input_cols: raw_summary_detailed
|
62
63
|
extract_content: true
|
64
|
+
# extract_reasoning_content: true
|
63
65
|
|
64
66
|
- block_type: TextParserBlock
|
65
67
|
block_config:
|
66
68
|
block_name: parse_detailed_summary
|
67
|
-
input_cols:
|
69
|
+
input_cols: detailed_summary_content
|
68
70
|
output_cols: summary_detailed
|
69
71
|
start_tags: [""]
|
70
72
|
end_tags: [""]
|
@@ -86,14 +88,14 @@ blocks:
|
|
86
88
|
|
87
89
|
- block_type: LLMParserBlock
|
88
90
|
block_config:
|
89
|
-
block_name:
|
91
|
+
block_name: atomic_facts
|
90
92
|
input_cols: raw_atomic_facts
|
91
93
|
extract_content: true
|
92
94
|
|
93
95
|
- block_type: TextParserBlock
|
94
96
|
block_config:
|
95
97
|
block_name: parse_atomic_facts
|
96
|
-
input_cols:
|
98
|
+
input_cols: atomic_facts_content
|
97
99
|
output_cols: summary_atomic_facts
|
98
100
|
start_tags: [""]
|
99
101
|
end_tags: [""]
|
@@ -115,14 +117,14 @@ blocks:
|
|
115
117
|
|
116
118
|
- block_type: LLMParserBlock
|
117
119
|
block_config:
|
118
|
-
block_name:
|
120
|
+
block_name: extractive_summary
|
119
121
|
input_cols: raw_summary_extractive
|
120
122
|
extract_content: true
|
121
123
|
|
122
124
|
- block_type: TextParserBlock
|
123
125
|
block_config:
|
124
126
|
block_name: parse_extractive_summary
|
125
|
-
input_cols:
|
127
|
+
input_cols: extractive_summary_content
|
126
128
|
output_cols: summary_extractive
|
127
129
|
start_tags: [""]
|
128
130
|
end_tags: [""]
|
@@ -133,10 +135,14 @@ blocks:
|
|
133
135
|
input_cols: [summary_detailed, summary_extractive, summary_atomic_facts, base_document]
|
134
136
|
output_cols: [summary, dataset_type]
|
135
137
|
|
138
|
+
- block_type: RenameColumnsBlock
|
139
|
+
block_config:
|
140
|
+
block_name: rename_to_raw_document_column
|
141
|
+
input_cols: {document: raw_document}
|
136
142
|
- block_type: RenameColumnsBlock
|
137
143
|
block_config:
|
138
144
|
block_name: rename_to_document_column
|
139
|
-
input_cols: {
|
145
|
+
input_cols: {summary: document}
|
140
146
|
|
141
147
|
- block_type: PromptBuilderBlock
|
142
148
|
block_config:
|
@@ -156,14 +162,14 @@ blocks:
|
|
156
162
|
|
157
163
|
- block_type: LLMParserBlock
|
158
164
|
block_config:
|
159
|
-
block_name:
|
165
|
+
block_name: get_knowledge_generation
|
160
166
|
input_cols: raw_knowledge_generation
|
161
167
|
extract_content: true
|
162
168
|
|
163
169
|
- block_type: TextParserBlock
|
164
170
|
block_config:
|
165
171
|
block_name: parse_knowledge_generation
|
166
|
-
input_cols:
|
172
|
+
input_cols: get_knowledge_generation_content
|
167
173
|
output_cols: [question, response]
|
168
174
|
parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
|
169
175
|
parser_cleanup_tags: ["[END]"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sdg_hub
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Synthetic Data Generation
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
6
6
|
License: Apache-2.0
|
@@ -23,7 +23,7 @@ Requires-Python: >=3.10
|
|
23
23
|
Description-Content-Type: text/markdown
|
24
24
|
License-File: LICENSE
|
25
25
|
Requires-Dist: click<9.0.0,>=8.1.7
|
26
|
-
Requires-Dist: datasets
|
26
|
+
Requires-Dist: datasets>=4.0.0
|
27
27
|
Requires-Dist: httpx<1.0.0,>=0.25.0
|
28
28
|
Requires-Dist: jinja2
|
29
29
|
Requires-Dist: litellm<1.75.0,>=1.73.0
|
@@ -1,20 +1,10 @@
|
|
1
1
|
sdg_hub/__init__.py,sha256=TlkZT40-70urdcWLqv3kupaJj8s-SVgd2QyvlSFwb4A,510
|
2
|
-
sdg_hub/_version.py,sha256=
|
2
|
+
sdg_hub/_version.py,sha256=fvHpBU3KZKRinkriKdtAt3crenOyysELF-M9y3ozg3U,704
|
3
3
|
sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sdg_hub/core/__init__.py,sha256=e3BoejbqjYhasf9t__L4qE52lkD9EBjx4o--2kqKdro,460
|
5
|
-
sdg_hub/core/blocks/__init__.py,sha256=
|
5
|
+
sdg_hub/core/blocks/__init__.py,sha256=8Rn1SglH8V3jGmTD_cG-h7qk9ktAab2eaBdyk7RN_hY,865
|
6
6
|
sdg_hub/core/blocks/base.py,sha256=-SOdBpJwtRTMsrmCEuLjUBQMRCo_PLYlHEBRrz8sF9g,13031
|
7
7
|
sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
|
8
|
-
sdg_hub/core/blocks/deprecated_blocks/__init__.py,sha256=RDu3MWFStDQko-TKkx8tGoB1UTatP_RSldZK43zHDvY,889
|
9
|
-
sdg_hub/core/blocks/deprecated_blocks/combine_columns.py,sha256=HCvpaYsAwgx1Dm0vIshcWsKoVsRT0KrmKp9j4oqtByc,2757
|
10
|
-
sdg_hub/core/blocks/deprecated_blocks/duplicate_columns.py,sha256=maCaaEs0EMMzt7L1xm7fAH3ylaFMHEkeC_dtOw3FrjU,2694
|
11
|
-
sdg_hub/core/blocks/deprecated_blocks/filter_by_value.py,sha256=-fuuMKj2g2MrijMBTd0PWtYBbf9anQ2UkYXHigCxxJI,3328
|
12
|
-
sdg_hub/core/blocks/deprecated_blocks/flatten_columns.py,sha256=IenCskrPEv09h2uT6aZKCQzaxgA_3kAzOeJSd-R_-EA,2839
|
13
|
-
sdg_hub/core/blocks/deprecated_blocks/llmblock.py,sha256=34lzC43BODpMk5AwlWA1ctdYPmN7cA6WL5vMXaI0P0Y,20385
|
14
|
-
sdg_hub/core/blocks/deprecated_blocks/rename_columns.py,sha256=thp-mHtkRmUw_nYKpldy_mLWR2AvC5YUhbqDETM6-T0,2620
|
15
|
-
sdg_hub/core/blocks/deprecated_blocks/sample_populator.py,sha256=UdueMApxOmPWaxxMrw7b1v74fKJBfqqRATEBqgmVtNw,1737
|
16
|
-
sdg_hub/core/blocks/deprecated_blocks/selector.py,sha256=nWecsVsW8DvBcqAF_LOqXmW-5MQ28uN3d1y6wkSy38c,2960
|
17
|
-
sdg_hub/core/blocks/deprecated_blocks/set_to_majority_value.py,sha256=44TQu-rK5isia-otMVB1zHd8D-wWmu3C8CI1NLtfY5s,2729
|
18
8
|
sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
|
19
9
|
sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=2Z9j_CiiTn5mHZ9gfXU-itLXDmeXSh0UI0x1x7j-LQ0,6001
|
20
10
|
sdg_hub/core/blocks/llm/__init__.py,sha256=AyS0dd3pkPPXH5a9aj4mT5HsKjX2vjXfkmQc6rkFV4A,795
|
@@ -29,24 +19,24 @@ sdg_hub/core/blocks/transform/duplicate_columns.py,sha256=SaP7rIF4ZFEFFa50aU2xGN
|
|
29
19
|
sdg_hub/core/blocks/transform/index_based_mapper.py,sha256=XC_a7Skbd3mu7f4ra8fGWPxMwqUMSjJkQ7Ag7vflwJA,8235
|
30
20
|
sdg_hub/core/blocks/transform/json_structure_block.py,sha256=hm-0M0NAyUREgJRPyV1u-laorgX6MZ1o17E9rNBhN78,5010
|
31
21
|
sdg_hub/core/blocks/transform/melt_columns.py,sha256=vaYa5Taq6GhNZYWFL4uPK3-SfN2BsKEm-wvjd2EYYoI,4382
|
32
|
-
sdg_hub/core/blocks/transform/rename_columns.py,sha256=
|
22
|
+
sdg_hub/core/blocks/transform/rename_columns.py,sha256=W2hcDSJY6L73ZpElUhOML2sGLM9Y-v0gSo3xEF1LXDc,2749
|
33
23
|
sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
|
34
24
|
sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
|
35
25
|
sdg_hub/core/flow/__init__.py,sha256=0_m_htuZfPxk8xQ9IKfp0Pz-JRE4O7lYMUFrKyLNoLA,409
|
36
|
-
sdg_hub/core/flow/base.py,sha256=
|
26
|
+
sdg_hub/core/flow/base.py,sha256=64YJJujNRaSIbT1YKn9nAxij_hdJ9xRVH_uiUY1IUcI,55788
|
37
27
|
sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
|
38
28
|
sdg_hub/core/flow/metadata.py,sha256=cFrpJjWOaK87aCuRFyC3Pdf83oYU93mrmZEMdUnhsN8,10540
|
39
|
-
sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
|
40
29
|
sdg_hub/core/flow/registry.py,sha256=N6KfX-L7QRkooznIFxDuhRZYuDA5g3N5zC-KRm2jVhk,12109
|
41
30
|
sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
|
42
|
-
sdg_hub/core/utils/__init__.py,sha256=
|
31
|
+
sdg_hub/core/utils/__init__.py,sha256=KcT56JhobC5sBg0MKEMn5hc4OyKa9_Vnn45Mt_kS4jQ,610
|
43
32
|
sdg_hub/core/utils/datautils.py,sha256=__HkUe1DxcJVHKrFX68z_hDXwxJygBlJDfjJLnj7rHc,4230
|
44
33
|
sdg_hub/core/utils/error_handling.py,sha256=yku8cGj_nKCyXDsnb-mHCpgukkkAMucJ4iAUrIzqysc,5510
|
45
34
|
sdg_hub/core/utils/flow_id_words.yaml,sha256=5QHpQdP7zwahRuooyAlJIwBY7WcDR7vtbJXxVJqujbg,2317
|
46
35
|
sdg_hub/core/utils/flow_identifier.py,sha256=aAHfK_G9AwEtMglLRMdMpi_AI1dciub5UqBGm4yb2HE,2841
|
47
|
-
sdg_hub/core/utils/flow_metrics.py,sha256=
|
36
|
+
sdg_hub/core/utils/flow_metrics.py,sha256=3G-xbfr-rFA578wV4KUbQePTMVGZHr9-rXvyYL4Kt2Q,12604
|
48
37
|
sdg_hub/core/utils/logger_config.py,sha256=6_cnsIHtSAdq1iTTZ7Q7nAJ1dmldlxSZ0AB49yLiQ20,2034
|
49
38
|
sdg_hub/core/utils/path_resolution.py,sha256=yWof4kGNpQ5dKcrVHg0h9KfOKLZ6ROjdfsLAZsQT5rM,2000
|
39
|
+
sdg_hub/core/utils/time_estimator.py,sha256=rM3_R-Ka5DEtvOtlJoA_5pXSyQ6tT6t4h6qh3_5BCZo,12639
|
50
40
|
sdg_hub/core/utils/yaml_utils.py,sha256=tShCd-FFkp0xlKnLe7dXsMOR4AvT9d2qRUmu4ZnPSEY,1458
|
51
41
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
42
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml,sha256=THRT3cY44KGI_69B2wqt2Q89EknnOSE7B4A_jdnxlIU,330
|
@@ -54,14 +44,14 @@ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/gener
|
|
54
44
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml,sha256=qHOgUNrQz2vjUjJiEHNGWxDDXwjJlP1kofTxeGgLyPI,1461
|
55
45
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
46
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml,sha256=Ik6gAml0O-jPq8jpXBAkURzYkQuFOnDZb4LDwjmfAiE,381
|
57
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=
|
47
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=cxNpPh60mcvzxfczMH8hw66Ql3S8O-cWCCDeauO736c,5649
|
58
48
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
59
49
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml,sha256=smPWVUZRCt58EagWDmJVmTBQj8qMcjpzh-Q3GSuFrz0,4413
|
60
50
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
61
51
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml,sha256=SeapWoOx3fhN5SvWYuHss_9prLE8xSkOic7JkbDHSR0,4081
|
62
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=
|
52
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=7dVc0_g7Ex5SfdX57pqtk9gmH_lC6Cdm3HC-lg8OiXQ,5817
|
63
53
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=
|
54
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=7X4N19TcyHUo7pNo3C6Zv3w6br7hjzEfgv06XUVDaQo,3330
|
65
55
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml,sha256=YKMX_CuvcThG_bdNCAIXdVBkMvB72I89RGq2ltSSgc8,3298
|
66
56
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
57
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -71,14 +61,14 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
|
|
71
61
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
|
72
62
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
|
73
63
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
|
74
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=
|
64
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=km0ggcmFsZJGc2TfyYLkzPTrHGmcOB-jBAHInqySisk,9176
|
75
65
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
|
76
66
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
77
67
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
68
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml,sha256=OjPZaSCOSLxEWgW3pmNwF7mmLhGhFGTmKL_3rKdqeW4,2488
|
79
69
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml,sha256=nEy_RcotHGiiENrmUANpKkbIFsrARAeSwECrBeHi2so,391
|
80
70
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml,sha256=V90W0IeJQZTFThA8v0UOs3DtZbtU3BI9jkpChw1BULo,402
|
81
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml,sha256=
|
71
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml,sha256=Q6RusV-_HHMr5jlFNOP6UVuEf8d6btHENMOP3MnB3u0,9291
|
82
72
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml,sha256=96SQqXG7fmb-50SdX85sgVtrFcQ-oNKe_0BoQdZmY5g,2638
|
83
73
|
sdg_hub/flows/text_analysis/__init__.py,sha256=WStks4eM_KHNTVsHglcj8vFghmI0PH9P1hUrijBLbwc,125
|
84
74
|
sdg_hub/flows/text_analysis/structured_insights/__init__.py,sha256=_DT4NR05JD9CZoSWROPr2lC6se0VjSqQPZJJlEV79mk,274
|
@@ -87,8 +77,8 @@ sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml,sha256=Q_S
|
|
87
77
|
sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml,sha256=_nPPMdHnxag_lYbhYUjGJGo-CvRwWvwdGX7cQhdZ1S0,847
|
88
78
|
sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=BBV18SdvuVTAESjwkJ7V1jbb-cSTBvNl3SCycd0oEQ4,4934
|
89
79
|
sdg_hub/flows/text_analysis/structured_insights/summarize.yaml,sha256=WXwQak1pF8e1OwnOoI1EHu8QB6iUNW89rfkTdi1Oq54,687
|
90
|
-
sdg_hub-0.
|
91
|
-
sdg_hub-0.
|
92
|
-
sdg_hub-0.
|
93
|
-
sdg_hub-0.
|
94
|
-
sdg_hub-0.
|
80
|
+
sdg_hub-0.5.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
81
|
+
sdg_hub-0.5.0.dist-info/METADATA,sha256=z4tCCtWlTBzu5DF1K44RtWjIs7ZNL6__2Aae7I0EfxQ,9775
|
82
|
+
sdg_hub-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
83
|
+
sdg_hub-0.5.0.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
|
84
|
+
sdg_hub-0.5.0.dist-info/RECORD,,
|
@@ -1,29 +0,0 @@
|
|
1
|
-
# SPDX-License-Identifier: Apache-2.0
|
2
|
-
"""Deprecated blocks for backwards compatibility.
|
3
|
-
|
4
|
-
This module contains deprecated block implementations that are maintained
|
5
|
-
for backwards compatibility. These blocks should not be used in new code.
|
6
|
-
"""
|
7
|
-
|
8
|
-
# Local
|
9
|
-
from .combine_columns import CombineColumnsBlock
|
10
|
-
from .duplicate_columns import DuplicateColumns
|
11
|
-
from .filter_by_value import FilterByValueBlock
|
12
|
-
from .flatten_columns import FlattenColumnsBlock
|
13
|
-
from .llmblock import LLMBlock
|
14
|
-
from .rename_columns import RenameColumns
|
15
|
-
from .sample_populator import SamplePopulatorBlock
|
16
|
-
from .selector import SelectorBlock
|
17
|
-
from .set_to_majority_value import SetToMajorityValue
|
18
|
-
|
19
|
-
__all__ = [
|
20
|
-
"CombineColumnsBlock",
|
21
|
-
"DuplicateColumns",
|
22
|
-
"FilterByValueBlock",
|
23
|
-
"FlattenColumnsBlock",
|
24
|
-
"LLMBlock",
|
25
|
-
"RenameColumns",
|
26
|
-
"SamplePopulatorBlock",
|
27
|
-
"SelectorBlock",
|
28
|
-
"SetToMajorityValue",
|
29
|
-
]
|