sdg-hub 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/__init__.py +0 -2
- sdg_hub/_version.py +2 -2
- sdg_hub/core/__init__.py +1 -2
- sdg_hub/core/flow/__init__.py +3 -4
- sdg_hub/core/flow/base.py +143 -71
- sdg_hub/core/flow/metadata.py +1 -68
- sdg_hub/core/flow/registry.py +0 -1
- sdg_hub/core/utils/__init__.py +11 -3
- sdg_hub/core/utils/flow_metrics.py +116 -0
- sdg_hub/core/utils/time_estimator.py +344 -0
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml +0 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml +0 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml +0 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml +0 -1
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml +1 -2
- sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml +11 -10
- sdg_hub/flows/text_analysis/structured_insights/flow.yaml +0 -1
- {sdg_hub-0.4.0.dist-info → sdg_hub-0.4.2.dist-info}/METADATA +2 -1
- {sdg_hub-0.4.0.dist-info → sdg_hub-0.4.2.dist-info}/RECORD +22 -21
- {sdg_hub-0.4.0.dist-info → sdg_hub-0.4.2.dist-info}/WHEEL +0 -0
- {sdg_hub-0.4.0.dist-info → sdg_hub-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.4.0.dist-info → sdg_hub-0.4.2.dist-info}/top_level.txt +0 -0
sdg_hub/__init__.py
CHANGED
@@ -8,7 +8,6 @@ from .core import (
|
|
8
8
|
BlockRegistry,
|
9
9
|
Flow,
|
10
10
|
FlowMetadata,
|
11
|
-
FlowParameter,
|
12
11
|
FlowRegistry,
|
13
12
|
FlowValidator,
|
14
13
|
GenerateError,
|
@@ -23,7 +22,6 @@ __all__ = [
|
|
23
22
|
"FlowRegistry",
|
24
23
|
# Metadata and utilities
|
25
24
|
"FlowMetadata",
|
26
|
-
"FlowParameter",
|
27
25
|
"FlowValidator",
|
28
26
|
"GenerateError",
|
29
27
|
"resolve_path",
|
sdg_hub/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.4.
|
32
|
-
__version_tuple__ = version_tuple = (0, 4,
|
31
|
+
__version__ = version = '0.4.2'
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 2)
|
33
33
|
|
34
34
|
__commit_id__ = commit_id = None
|
sdg_hub/core/__init__.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
# Local
|
5
5
|
from .blocks import BaseBlock, BlockRegistry
|
6
|
-
from .flow import Flow, FlowMetadata,
|
6
|
+
from .flow import Flow, FlowMetadata, FlowRegistry, FlowValidator
|
7
7
|
from .utils import GenerateError, resolve_path
|
8
8
|
|
9
9
|
__all__ = [
|
@@ -14,7 +14,6 @@ __all__ = [
|
|
14
14
|
"Flow",
|
15
15
|
"FlowRegistry",
|
16
16
|
"FlowMetadata",
|
17
|
-
"FlowParameter",
|
18
17
|
"FlowValidator",
|
19
18
|
# Utils
|
20
19
|
"GenerateError",
|
sdg_hub/core/flow/__init__.py
CHANGED
@@ -1,20 +1,19 @@
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
2
2
|
"""New flow implementation for SDG Hub.
|
3
3
|
|
4
|
-
This module provides a redesigned Flow class with metadata support
|
5
|
-
dual initialization modes
|
4
|
+
This module provides a redesigned Flow class with metadata support
|
5
|
+
and dual initialization modes.
|
6
6
|
"""
|
7
7
|
|
8
8
|
# Local
|
9
9
|
from .base import Flow
|
10
|
-
from .metadata import FlowMetadata
|
10
|
+
from .metadata import FlowMetadata
|
11
11
|
from .registry import FlowRegistry
|
12
12
|
from .validation import FlowValidator
|
13
13
|
|
14
14
|
__all__ = [
|
15
15
|
"Flow",
|
16
16
|
"FlowMetadata",
|
17
|
-
"FlowParameter",
|
18
17
|
"FlowRegistry",
|
19
18
|
"FlowValidator",
|
20
19
|
]
|
sdg_hub/core/flow/base.py
CHANGED
@@ -30,12 +30,17 @@ from ..blocks.base import BaseBlock
|
|
30
30
|
from ..blocks.registry import BlockRegistry
|
31
31
|
from ..utils.datautils import safe_concatenate_with_validation, validate_no_duplicates
|
32
32
|
from ..utils.error_handling import EmptyDatasetError, FlowValidationError
|
33
|
-
from ..utils.flow_metrics import
|
33
|
+
from ..utils.flow_metrics import (
|
34
|
+
display_metrics_summary,
|
35
|
+
display_time_estimation_summary,
|
36
|
+
save_metrics_to_json,
|
37
|
+
)
|
34
38
|
from ..utils.logger_config import setup_logger
|
35
39
|
from ..utils.path_resolution import resolve_path
|
40
|
+
from ..utils.time_estimator import estimate_execution_time
|
36
41
|
from ..utils.yaml_utils import save_flow_yaml
|
37
42
|
from .checkpointer import FlowCheckpointer
|
38
|
-
from .metadata import DatasetRequirements, FlowMetadata
|
43
|
+
from .metadata import DatasetRequirements, FlowMetadata
|
39
44
|
from .migration import FlowMigration
|
40
45
|
from .validation import FlowValidator
|
41
46
|
|
@@ -55,8 +60,6 @@ class Flow(BaseModel):
|
|
55
60
|
Ordered list of blocks to execute in the flow.
|
56
61
|
metadata : FlowMetadata
|
57
62
|
Flow metadata including name, version, author, etc.
|
58
|
-
parameters : Dict[str, FlowParameter]
|
59
|
-
Runtime parameters that can be overridden during execution.
|
60
63
|
"""
|
61
64
|
|
62
65
|
blocks: list[BaseBlock] = Field(
|
@@ -66,10 +69,6 @@ class Flow(BaseModel):
|
|
66
69
|
metadata: FlowMetadata = Field(
|
67
70
|
description="Flow metadata including name, version, author, etc."
|
68
71
|
)
|
69
|
-
parameters: dict[str, FlowParameter] = Field(
|
70
|
-
default_factory=dict,
|
71
|
-
description="Runtime parameters that can be overridden during execution",
|
72
|
-
)
|
73
72
|
|
74
73
|
model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
|
75
74
|
|
@@ -96,32 +95,6 @@ class Flow(BaseModel):
|
|
96
95
|
|
97
96
|
return v
|
98
97
|
|
99
|
-
@field_validator("parameters")
|
100
|
-
@classmethod
|
101
|
-
def validate_parameters(
|
102
|
-
cls, v: dict[str, FlowParameter]
|
103
|
-
) -> dict[str, FlowParameter]:
|
104
|
-
"""Validate parameter names and ensure they are FlowParameter instances."""
|
105
|
-
if not v:
|
106
|
-
return v
|
107
|
-
|
108
|
-
validated = {}
|
109
|
-
for param_name, param_value in v.items():
|
110
|
-
if not isinstance(param_name, str) or not param_name.strip():
|
111
|
-
raise ValueError(
|
112
|
-
f"Parameter name must be a non-empty string: {param_name}"
|
113
|
-
)
|
114
|
-
|
115
|
-
if not isinstance(param_value, FlowParameter):
|
116
|
-
raise ValueError(
|
117
|
-
f"Parameter '{param_name}' must be a FlowParameter instance, "
|
118
|
-
f"got: {type(param_value)}"
|
119
|
-
)
|
120
|
-
|
121
|
-
validated[param_name.strip()] = param_value
|
122
|
-
|
123
|
-
return validated
|
124
|
-
|
125
98
|
@model_validator(mode="after")
|
126
99
|
def validate_block_names_unique(self) -> "Flow":
|
127
100
|
"""Ensure all block names are unique within the flow."""
|
@@ -215,17 +188,6 @@ class Flow(BaseModel):
|
|
215
188
|
except Exception as exc:
|
216
189
|
raise FlowValidationError(f"Invalid metadata configuration: {exc}") from exc
|
217
190
|
|
218
|
-
# Extract and validate parameters
|
219
|
-
parameters = {}
|
220
|
-
params_dict = flow_config.get("parameters", {})
|
221
|
-
for param_name, param_config in params_dict.items():
|
222
|
-
try:
|
223
|
-
parameters[param_name] = FlowParameter(**param_config)
|
224
|
-
except Exception as exc:
|
225
|
-
raise FlowValidationError(
|
226
|
-
f"Invalid parameter '{param_name}': {exc}"
|
227
|
-
) from exc
|
228
|
-
|
229
191
|
# Create blocks with validation
|
230
192
|
blocks = []
|
231
193
|
block_configs = flow_config.get("blocks", [])
|
@@ -254,7 +216,7 @@ class Flow(BaseModel):
|
|
254
216
|
|
255
217
|
# Create and validate the flow
|
256
218
|
try:
|
257
|
-
flow = cls(blocks=blocks, metadata=metadata
|
219
|
+
flow = cls(blocks=blocks, metadata=metadata)
|
258
220
|
# Persist generated id back to the YAML file (only on initial load)
|
259
221
|
# If the file had no metadata.id originally, update and rewrite
|
260
222
|
if not flow_config.get("metadata", {}).get("id"):
|
@@ -1049,6 +1011,8 @@ class Flow(BaseModel):
|
|
1049
1011
|
dataset: Dataset,
|
1050
1012
|
sample_size: int = 2,
|
1051
1013
|
runtime_params: Optional[dict[str, dict[str, Any]]] = None,
|
1014
|
+
max_concurrency: Optional[int] = None,
|
1015
|
+
enable_time_estimation: bool = False,
|
1052
1016
|
) -> dict[str, Any]:
|
1053
1017
|
"""Perform a dry run of the flow with a subset of data.
|
1054
1018
|
|
@@ -1060,11 +1024,18 @@ class Flow(BaseModel):
|
|
1060
1024
|
Number of samples to use for dry run testing.
|
1061
1025
|
runtime_params : Optional[Dict[str, Dict[str, Any]]], optional
|
1062
1026
|
Runtime parameters organized by block name.
|
1027
|
+
max_concurrency : Optional[int], optional
|
1028
|
+
Maximum concurrent requests for LLM blocks. If None, no limit is applied.
|
1029
|
+
enable_time_estimation : bool, default=False
|
1030
|
+
If True, estimates execution time for the full dataset and displays it
|
1031
|
+
in a Rich table. Automatically runs a second dry run if needed for
|
1032
|
+
accurate scaling analysis.
|
1063
1033
|
|
1064
1034
|
Returns
|
1065
1035
|
-------
|
1066
1036
|
Dict[str, Any]
|
1067
1037
|
Dry run results with execution info and sample outputs.
|
1038
|
+
Time estimation is displayed in a table but not included in return value.
|
1068
1039
|
|
1069
1040
|
Raises
|
1070
1041
|
------
|
@@ -1082,6 +1053,19 @@ class Flow(BaseModel):
|
|
1082
1053
|
|
1083
1054
|
validate_no_duplicates(dataset)
|
1084
1055
|
|
1056
|
+
# Validate max_concurrency parameter
|
1057
|
+
if max_concurrency is not None:
|
1058
|
+
if isinstance(max_concurrency, bool) or not isinstance(
|
1059
|
+
max_concurrency, int
|
1060
|
+
):
|
1061
|
+
raise FlowValidationError(
|
1062
|
+
f"max_concurrency must be an int, got {type(max_concurrency).__name__}"
|
1063
|
+
)
|
1064
|
+
if max_concurrency <= 0:
|
1065
|
+
raise FlowValidationError(
|
1066
|
+
f"max_concurrency must be greater than 0, got {max_concurrency}"
|
1067
|
+
)
|
1068
|
+
|
1085
1069
|
# Use smaller sample size if dataset is smaller
|
1086
1070
|
actual_sample_size = min(sample_size, len(dataset))
|
1087
1071
|
|
@@ -1099,6 +1083,7 @@ class Flow(BaseModel):
|
|
1099
1083
|
"flow_version": self.metadata.version,
|
1100
1084
|
"sample_size": actual_sample_size,
|
1101
1085
|
"original_dataset_size": len(dataset),
|
1086
|
+
"max_concurrency": max_concurrency,
|
1102
1087
|
"input_columns": dataset.column_names,
|
1103
1088
|
"blocks_executed": [],
|
1104
1089
|
"final_dataset": None,
|
@@ -1125,6 +1110,10 @@ class Flow(BaseModel):
|
|
1125
1110
|
# Prepare block execution parameters
|
1126
1111
|
block_kwargs = self._prepare_block_kwargs(block, runtime_params)
|
1127
1112
|
|
1113
|
+
# Add max_concurrency to block kwargs if provided
|
1114
|
+
if max_concurrency is not None:
|
1115
|
+
block_kwargs["_flow_max_concurrency"] = max_concurrency
|
1116
|
+
|
1128
1117
|
# Check if this is a deprecated block and skip validations
|
1129
1118
|
is_deprecated_block = (
|
1130
1119
|
hasattr(block, "__class__")
|
@@ -1142,7 +1131,9 @@ class Flow(BaseModel):
|
|
1142
1131
|
# Execute block with validation and logging
|
1143
1132
|
current_dataset = block(current_dataset, **block_kwargs)
|
1144
1133
|
|
1145
|
-
block_execution_time =
|
1134
|
+
block_execution_time = (
|
1135
|
+
time.perf_counter() - block_start_time
|
1136
|
+
) # Fixed: use perf_counter consistently
|
1146
1137
|
|
1147
1138
|
# Record block execution info
|
1148
1139
|
block_info = {
|
@@ -1181,6 +1172,12 @@ class Flow(BaseModel):
|
|
1181
1172
|
f"in {execution_time:.2f}s"
|
1182
1173
|
)
|
1183
1174
|
|
1175
|
+
# Perform time estimation if requested (displays table but doesn't store in results)
|
1176
|
+
if enable_time_estimation:
|
1177
|
+
self._estimate_total_time(
|
1178
|
+
dry_run_results, dataset, runtime_params, max_concurrency
|
1179
|
+
)
|
1180
|
+
|
1184
1181
|
return dry_run_results
|
1185
1182
|
|
1186
1183
|
except Exception as exc:
|
@@ -1193,6 +1190,103 @@ class Flow(BaseModel):
|
|
1193
1190
|
|
1194
1191
|
raise FlowValidationError(f"Dry run failed: {exc}") from exc
|
1195
1192
|
|
1193
|
+
def _estimate_total_time(
|
1194
|
+
self,
|
1195
|
+
first_run_results: dict[str, Any],
|
1196
|
+
dataset: Dataset,
|
1197
|
+
runtime_params: Optional[dict[str, dict[str, Any]]],
|
1198
|
+
max_concurrency: Optional[int],
|
1199
|
+
) -> dict[str, Any]:
|
1200
|
+
"""Estimate execution time using 2 dry runs (private method).
|
1201
|
+
|
1202
|
+
This method contains all the estimation logic. It determines if a second
|
1203
|
+
dry run is needed, executes it, and calls estimate_execution_time.
|
1204
|
+
|
1205
|
+
Parameters
|
1206
|
+
----------
|
1207
|
+
first_run_results : dict
|
1208
|
+
Results from the first dry run.
|
1209
|
+
dataset : Dataset
|
1210
|
+
Full dataset for estimation.
|
1211
|
+
runtime_params : Optional[dict]
|
1212
|
+
Runtime parameters.
|
1213
|
+
max_concurrency : Optional[int]
|
1214
|
+
Maximum concurrency.
|
1215
|
+
|
1216
|
+
Returns
|
1217
|
+
-------
|
1218
|
+
dict
|
1219
|
+
Estimation results with estimated_time_seconds, total_estimated_requests, etc.
|
1220
|
+
"""
|
1221
|
+
first_sample_size = first_run_results["sample_size"]
|
1222
|
+
|
1223
|
+
# Check if we need a second dry run
|
1224
|
+
has_async_blocks = any(
|
1225
|
+
getattr(block, "async_mode", False) for block in self.blocks
|
1226
|
+
)
|
1227
|
+
|
1228
|
+
# For sequential or no async blocks, single run is sufficient
|
1229
|
+
if max_concurrency == 1 or not has_async_blocks:
|
1230
|
+
estimation = estimate_execution_time(
|
1231
|
+
dry_run_1=first_run_results,
|
1232
|
+
dry_run_2=None,
|
1233
|
+
total_dataset_size=len(dataset),
|
1234
|
+
max_concurrency=max_concurrency,
|
1235
|
+
)
|
1236
|
+
else:
|
1237
|
+
# Need second measurement - always use canonical (1, 5) pair
|
1238
|
+
if first_sample_size == 1:
|
1239
|
+
# Already have 1, need 5
|
1240
|
+
logger.info("Running second dry run with 5 samples for time estimation")
|
1241
|
+
second_run = self.dry_run(
|
1242
|
+
dataset,
|
1243
|
+
5,
|
1244
|
+
runtime_params,
|
1245
|
+
max_concurrency,
|
1246
|
+
enable_time_estimation=False,
|
1247
|
+
)
|
1248
|
+
dry_run_1, dry_run_2 = first_run_results, second_run
|
1249
|
+
elif first_sample_size == 5:
|
1250
|
+
# Already have 5, need 1
|
1251
|
+
logger.info("Running second dry run with 1 sample for time estimation")
|
1252
|
+
second_run = self.dry_run(
|
1253
|
+
dataset,
|
1254
|
+
1,
|
1255
|
+
runtime_params,
|
1256
|
+
max_concurrency,
|
1257
|
+
enable_time_estimation=False,
|
1258
|
+
)
|
1259
|
+
dry_run_1, dry_run_2 = second_run, first_run_results
|
1260
|
+
else:
|
1261
|
+
# For other sizes: run both 1 and 5 for canonical pair
|
1262
|
+
logger.info("Running dry runs with 1 and 5 samples for time estimation")
|
1263
|
+
dry_run_1 = self.dry_run(
|
1264
|
+
dataset,
|
1265
|
+
1,
|
1266
|
+
runtime_params,
|
1267
|
+
max_concurrency,
|
1268
|
+
enable_time_estimation=False,
|
1269
|
+
)
|
1270
|
+
dry_run_2 = self.dry_run(
|
1271
|
+
dataset,
|
1272
|
+
5,
|
1273
|
+
runtime_params,
|
1274
|
+
max_concurrency,
|
1275
|
+
enable_time_estimation=False,
|
1276
|
+
)
|
1277
|
+
|
1278
|
+
estimation = estimate_execution_time(
|
1279
|
+
dry_run_1=dry_run_1,
|
1280
|
+
dry_run_2=dry_run_2,
|
1281
|
+
total_dataset_size=len(dataset),
|
1282
|
+
max_concurrency=max_concurrency,
|
1283
|
+
)
|
1284
|
+
|
1285
|
+
# Display estimation summary
|
1286
|
+
display_time_estimation_summary(estimation, len(dataset), max_concurrency)
|
1287
|
+
|
1288
|
+
return estimation
|
1289
|
+
|
1196
1290
|
def add_block(self, block: BaseBlock) -> "Flow":
|
1197
1291
|
"""Add a block to the flow, returning a new Flow instance.
|
1198
1292
|
|
@@ -1225,17 +1319,12 @@ class Flow(BaseModel):
|
|
1225
1319
|
# Create new flow with added block
|
1226
1320
|
new_blocks = self.blocks + [block]
|
1227
1321
|
|
1228
|
-
return Flow(
|
1229
|
-
blocks=new_blocks, metadata=self.metadata, parameters=self.parameters
|
1230
|
-
)
|
1322
|
+
return Flow(blocks=new_blocks, metadata=self.metadata)
|
1231
1323
|
|
1232
1324
|
def get_info(self) -> dict[str, Any]:
|
1233
1325
|
"""Get information about the flow."""
|
1234
1326
|
return {
|
1235
1327
|
"metadata": self.metadata.model_dump(),
|
1236
|
-
"parameters": {
|
1237
|
-
name: param.model_dump() for name, param in self.parameters.items()
|
1238
|
-
},
|
1239
1328
|
"blocks": [
|
1240
1329
|
{
|
1241
1330
|
"block_type": block.__class__.__name__,
|
@@ -1339,8 +1428,7 @@ class Flow(BaseModel):
|
|
1339
1428
|
|
1340
1429
|
The summary contains:
|
1341
1430
|
1. Flow metadata (name, version, author, description)
|
1342
|
-
2.
|
1343
|
-
3. A table of all blocks with their input and output columns
|
1431
|
+
2. A table of all blocks with their input and output columns
|
1344
1432
|
|
1345
1433
|
Notes
|
1346
1434
|
-----
|
@@ -1374,17 +1462,6 @@ class Flow(BaseModel):
|
|
1374
1462
|
f"Description: [white]{self.metadata.description}[/white]"
|
1375
1463
|
)
|
1376
1464
|
|
1377
|
-
# Parameters section
|
1378
|
-
if self.parameters:
|
1379
|
-
params_branch = flow_tree.add(
|
1380
|
-
"[bold bright_yellow]Parameters[/bold bright_yellow]"
|
1381
|
-
)
|
1382
|
-
for name, param in self.parameters.items():
|
1383
|
-
param_info = f"[bright_cyan]{name}[/bright_cyan]: [white]{param.type_hint}[/white]"
|
1384
|
-
if param.default is not None:
|
1385
|
-
param_info += f" = [bright_white]{param.default}[/bright_white]"
|
1386
|
-
params_branch.add(param_info)
|
1387
|
-
|
1388
1465
|
# Blocks overview
|
1389
1466
|
flow_tree.add(
|
1390
1467
|
f"[bold bright_magenta]Blocks[/bold bright_magenta] ({len(self.blocks)} total)"
|
@@ -1446,11 +1523,6 @@ class Flow(BaseModel):
|
|
1446
1523
|
],
|
1447
1524
|
}
|
1448
1525
|
|
1449
|
-
if self.parameters:
|
1450
|
-
config["parameters"] = {
|
1451
|
-
name: param.model_dump() for name, param in self.parameters.items()
|
1452
|
-
}
|
1453
|
-
|
1454
1526
|
save_flow_yaml(output_path, config)
|
1455
1527
|
|
1456
1528
|
def __len__(self) -> int:
|
sdg_hub/core/flow/metadata.py
CHANGED
@@ -2,9 +2,8 @@
|
|
2
2
|
"""Flow metadata and parameter definitions."""
|
3
3
|
|
4
4
|
# Standard
|
5
|
-
from datetime import datetime
|
6
5
|
from enum import Enum
|
7
|
-
from typing import
|
6
|
+
from typing import Optional
|
8
7
|
|
9
8
|
# Third Party
|
10
9
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
@@ -118,39 +117,6 @@ class RecommendedModels(BaseModel):
|
|
118
117
|
return None
|
119
118
|
|
120
119
|
|
121
|
-
class FlowParameter(BaseModel):
|
122
|
-
"""Represents a runtime parameter for a flow.
|
123
|
-
|
124
|
-
Attributes
|
125
|
-
----------
|
126
|
-
default : Any
|
127
|
-
Default value for the parameter.
|
128
|
-
description : str
|
129
|
-
Human-readable description of the parameter.
|
130
|
-
type_hint : str
|
131
|
-
Type hint as string (e.g., "float", "str").
|
132
|
-
required : bool
|
133
|
-
Whether this parameter is required at runtime.
|
134
|
-
constraints : Dict[str, Any]
|
135
|
-
Additional constraints for the parameter.
|
136
|
-
"""
|
137
|
-
|
138
|
-
default: Any = Field(..., description="Default value for the parameter")
|
139
|
-
description: str = Field(default="", description="Human-readable description")
|
140
|
-
type_hint: str = Field(default="Any", description="Type hint as string")
|
141
|
-
required: bool = Field(default=False, description="Whether parameter is required")
|
142
|
-
constraints: dict[str, Any] = Field(
|
143
|
-
default_factory=dict, description="Additional constraints for the parameter"
|
144
|
-
)
|
145
|
-
|
146
|
-
@model_validator(mode="after")
|
147
|
-
def validate_required_default(self) -> "FlowParameter":
|
148
|
-
"""Validate that required parameters have appropriate defaults."""
|
149
|
-
if self.required and self.default is None:
|
150
|
-
raise ValueError("Required parameters cannot have None as default")
|
151
|
-
return self
|
152
|
-
|
153
|
-
|
154
120
|
class DatasetRequirements(BaseModel):
|
155
121
|
"""Dataset requirements for flow execution.
|
156
122
|
|
@@ -255,20 +221,10 @@ class FlowMetadata(BaseModel):
|
|
255
221
|
Simplified recommended models structure with default, compatible, and experimental lists.
|
256
222
|
tags : List[str]
|
257
223
|
Tags for categorization and search.
|
258
|
-
created_at : str
|
259
|
-
Creation timestamp.
|
260
|
-
updated_at : str
|
261
|
-
Last update timestamp.
|
262
224
|
license : str
|
263
225
|
License identifier.
|
264
|
-
min_sdg_hub_version : str
|
265
|
-
Minimum required SDG Hub version.
|
266
226
|
dataset_requirements : Optional[DatasetRequirements]
|
267
227
|
Requirements for input datasets.
|
268
|
-
estimated_cost : str
|
269
|
-
Estimated cost tier for running the flow.
|
270
|
-
estimated_duration : str
|
271
|
-
Estimated duration for flow execution.
|
272
228
|
"""
|
273
229
|
|
274
230
|
name: str = Field(..., min_length=1, description="Human-readable name")
|
@@ -288,29 +244,10 @@ class FlowMetadata(BaseModel):
|
|
288
244
|
tags: list[str] = Field(
|
289
245
|
default_factory=list, description="Tags for categorization and search"
|
290
246
|
)
|
291
|
-
created_at: str = Field(
|
292
|
-
default_factory=lambda: datetime.now().isoformat(),
|
293
|
-
description="Creation timestamp",
|
294
|
-
)
|
295
|
-
updated_at: str = Field(
|
296
|
-
default_factory=lambda: datetime.now().isoformat(),
|
297
|
-
description="Last update timestamp",
|
298
|
-
)
|
299
247
|
license: str = Field(default="Apache-2.0", description="License identifier")
|
300
|
-
min_sdg_hub_version: str = Field(
|
301
|
-
default="", description="Minimum required SDG Hub version"
|
302
|
-
)
|
303
248
|
dataset_requirements: Optional[DatasetRequirements] = Field(
|
304
249
|
default=None, description="Requirements for input datasets"
|
305
250
|
)
|
306
|
-
estimated_cost: str = Field(
|
307
|
-
default="medium",
|
308
|
-
pattern="^(low|medium|high)$",
|
309
|
-
description="Estimated cost tier for running the flow",
|
310
|
-
)
|
311
|
-
estimated_duration: str = Field(
|
312
|
-
default="", description="Estimated duration for flow execution"
|
313
|
-
)
|
314
251
|
|
315
252
|
@field_validator("id")
|
316
253
|
@classmethod
|
@@ -352,10 +289,6 @@ class FlowMetadata(BaseModel):
|
|
352
289
|
# Validation is handled within RecommendedModels class
|
353
290
|
return v
|
354
291
|
|
355
|
-
def update_timestamp(self) -> None:
|
356
|
-
"""Update the updated_at timestamp."""
|
357
|
-
self.updated_at = datetime.now().isoformat()
|
358
|
-
|
359
292
|
@model_validator(mode="after")
|
360
293
|
def ensure_id(self) -> "FlowMetadata":
|
361
294
|
"""Ensure id is set.
|
sdg_hub/core/flow/registry.py
CHANGED
sdg_hub/core/utils/__init__.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
# SPDX-License-Identifier: Apache-2.0
|
2
2
|
|
3
3
|
# Local
|
4
|
-
from .flow_identifier import get_flow_identifier
|
5
|
-
from .path_resolution import resolve_path
|
4
|
+
from .flow_identifier import get_flow_identifier as get_flow_identifier
|
5
|
+
from .path_resolution import resolve_path as resolve_path
|
6
|
+
from .time_estimator import estimate_execution_time as estimate_execution_time
|
7
|
+
from .time_estimator import is_llm_using_block as is_llm_using_block
|
6
8
|
|
7
9
|
|
8
10
|
# This is part of the public API, and used by instructlab
|
@@ -10,4 +12,10 @@ class GenerateError(Exception):
|
|
10
12
|
"""An exception raised during generate step."""
|
11
13
|
|
12
14
|
|
13
|
-
__all__ = [
|
15
|
+
__all__ = [
|
16
|
+
"GenerateError",
|
17
|
+
"resolve_path",
|
18
|
+
"get_flow_identifier",
|
19
|
+
"estimate_execution_time",
|
20
|
+
"is_llm_using_block",
|
21
|
+
]
|
@@ -188,6 +188,122 @@ def display_metrics_summary(
|
|
188
188
|
console.print()
|
189
189
|
|
190
190
|
|
191
|
+
def display_time_estimation_summary(
|
192
|
+
time_estimation: dict[str, Any],
|
193
|
+
dataset_size: int,
|
194
|
+
max_concurrency: Optional[int] = None,
|
195
|
+
) -> None:
|
196
|
+
"""Display a rich table summarizing time estimation results.
|
197
|
+
|
198
|
+
Parameters
|
199
|
+
----------
|
200
|
+
time_estimation : dict[str, Any]
|
201
|
+
Time estimation results from estimate_total_time().
|
202
|
+
dataset_size : int
|
203
|
+
Total number of samples in the dataset.
|
204
|
+
max_concurrency : Optional[int], optional
|
205
|
+
Maximum concurrency used for estimation.
|
206
|
+
"""
|
207
|
+
console = Console()
|
208
|
+
|
209
|
+
# Create main summary table
|
210
|
+
summary_table = Table(
|
211
|
+
show_header=False,
|
212
|
+
box=None,
|
213
|
+
padding=(0, 1),
|
214
|
+
)
|
215
|
+
summary_table.add_column("Metric", style="bright_cyan")
|
216
|
+
summary_table.add_column("Value", style="bright_white")
|
217
|
+
|
218
|
+
# Format time
|
219
|
+
est_seconds = time_estimation["estimated_time_seconds"]
|
220
|
+
if est_seconds < 60:
|
221
|
+
time_str = f"{est_seconds:.1f} seconds"
|
222
|
+
elif est_seconds < 3600:
|
223
|
+
time_str = f"{est_seconds / 60:.1f} minutes ({est_seconds / 3600:.2f} hours)"
|
224
|
+
else:
|
225
|
+
time_str = f"{est_seconds / 3600:.2f} hours ({est_seconds / 60:.0f} minutes)"
|
226
|
+
|
227
|
+
summary_table.add_row("Estimated Time:", time_str)
|
228
|
+
summary_table.add_row(
|
229
|
+
"Total LLM Requests:", f"{time_estimation.get('total_estimated_requests', 0):,}"
|
230
|
+
)
|
231
|
+
|
232
|
+
if time_estimation.get("total_estimated_requests", 0) > 0:
|
233
|
+
requests_per_sample = time_estimation["total_estimated_requests"] / dataset_size
|
234
|
+
summary_table.add_row("Requests per Sample:", f"{requests_per_sample:.1f}")
|
235
|
+
|
236
|
+
if max_concurrency is not None:
|
237
|
+
summary_table.add_row("Max Concurrency:", str(max_concurrency))
|
238
|
+
|
239
|
+
# Display summary panel
|
240
|
+
console.print()
|
241
|
+
console.print(
|
242
|
+
Panel(
|
243
|
+
summary_table,
|
244
|
+
title=f"[bold bright_white]Time Estimation for {dataset_size:,} Samples[/bold bright_white]",
|
245
|
+
border_style="bright_blue",
|
246
|
+
)
|
247
|
+
)
|
248
|
+
|
249
|
+
# Display per-block breakdown if available
|
250
|
+
block_estimates = time_estimation.get("block_estimates", [])
|
251
|
+
if block_estimates:
|
252
|
+
console.print()
|
253
|
+
|
254
|
+
# Create per-block table
|
255
|
+
block_table = Table(
|
256
|
+
show_header=True,
|
257
|
+
header_style="bold bright_white",
|
258
|
+
)
|
259
|
+
block_table.add_column("Block Name", style="bright_cyan", width=20)
|
260
|
+
block_table.add_column("Time", justify="right", style="bright_yellow", width=10)
|
261
|
+
block_table.add_column(
|
262
|
+
"Requests", justify="right", style="bright_green", width=10
|
263
|
+
)
|
264
|
+
block_table.add_column(
|
265
|
+
"Throughput", justify="right", style="bright_blue", width=12
|
266
|
+
)
|
267
|
+
block_table.add_column(
|
268
|
+
"Amplif.", justify="right", style="bright_magenta", width=10
|
269
|
+
)
|
270
|
+
|
271
|
+
for block in block_estimates:
|
272
|
+
# Format time
|
273
|
+
block_seconds = block["estimated_time"]
|
274
|
+
if block_seconds < 60:
|
275
|
+
time_str = f"{block_seconds:.1f}s"
|
276
|
+
else:
|
277
|
+
time_str = f"{block_seconds / 60:.1f}min"
|
278
|
+
|
279
|
+
# Format requests
|
280
|
+
requests_str = f"{block['estimated_requests']:,.0f}"
|
281
|
+
|
282
|
+
# Format throughput
|
283
|
+
throughput_str = f"{block['throughput']:.2f}/s"
|
284
|
+
|
285
|
+
# Format amplification
|
286
|
+
amplif_str = f"{block['amplification']:.1f}x"
|
287
|
+
|
288
|
+
block_table.add_row(
|
289
|
+
block["block"],
|
290
|
+
time_str,
|
291
|
+
requests_str,
|
292
|
+
throughput_str,
|
293
|
+
amplif_str,
|
294
|
+
)
|
295
|
+
|
296
|
+
console.print(
|
297
|
+
Panel(
|
298
|
+
block_table,
|
299
|
+
title="[bold bright_white]Per-Block Breakdown[/bold bright_white]",
|
300
|
+
border_style="bright_blue",
|
301
|
+
)
|
302
|
+
)
|
303
|
+
|
304
|
+
console.print()
|
305
|
+
|
306
|
+
|
191
307
|
def save_metrics_to_json(
|
192
308
|
block_metrics: list[dict[str, Any]],
|
193
309
|
flow_name: str,
|
@@ -0,0 +1,344 @@
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
2
|
+
"""Time estimation utility for predicting full dataset execution time from dry_run results."""
|
3
|
+
|
4
|
+
# Standard
|
5
|
+
from typing import Dict, Optional
|
6
|
+
import math
|
7
|
+
|
8
|
+
# Default max concurrent requests used during dry runs
|
9
|
+
DRY_RUN_MAX_CONCURRENT = 100
|
10
|
+
|
11
|
+
# Conservative estimation factor (20% buffer for API variability, network latency, etc.)
|
12
|
+
ESTIMATION_BUFFER_FACTOR = 1.2
|
13
|
+
|
14
|
+
|
15
|
+
def is_llm_using_block(block_info: Dict) -> bool:
|
16
|
+
"""Detect if a block uses LLMs.
|
17
|
+
|
18
|
+
Identifies blocks that make LLM API calls based on their type or parameters.
|
19
|
+
This is used to calculate request amplification for LLM blocks.
|
20
|
+
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
block_info : Dict
|
24
|
+
Block information from dry_run results containing block_type and parameters_used.
|
25
|
+
|
26
|
+
Returns
|
27
|
+
-------
|
28
|
+
bool
|
29
|
+
True if the block uses LLMs, False otherwise.
|
30
|
+
|
31
|
+
Examples
|
32
|
+
--------
|
33
|
+
>>> block = {"block_type": "LLMChatBlock", "parameters_used": {"model": "gpt-4"}}
|
34
|
+
>>> is_llm_using_block(block)
|
35
|
+
True
|
36
|
+
"""
|
37
|
+
block_type = block_info.get("block_type", "")
|
38
|
+
|
39
|
+
# Direct LLM blocks or evaluation/verification blocks
|
40
|
+
if any(kw in block_type for kw in ["LLMChatBlock", "Evaluate", "Verify"]):
|
41
|
+
return True
|
42
|
+
|
43
|
+
# Check for model parameters
|
44
|
+
params = block_info.get("parameters_used", {})
|
45
|
+
if any(key in params for key in ["model", "api_base", "api_key"]):
|
46
|
+
return True
|
47
|
+
|
48
|
+
return False
|
49
|
+
|
50
|
+
|
51
|
+
def calculate_block_throughput(
|
52
|
+
block_1: Dict, block_2: Dict, samples_1: int, samples_2: int
|
53
|
+
) -> Dict:
|
54
|
+
"""Calculate throughput and amplification from two dry runs.
|
55
|
+
|
56
|
+
Analyzes performance metrics from two dry runs with different sample sizes
|
57
|
+
to estimate throughput (requests/second), amplification factor, and startup overhead.
|
58
|
+
|
59
|
+
Parameters
|
60
|
+
----------
|
61
|
+
block_1 : Dict
|
62
|
+
Block execution info from first dry run.
|
63
|
+
block_2 : Dict
|
64
|
+
Block execution info from second dry run.
|
65
|
+
samples_1 : int
|
66
|
+
Number of samples in first dry run.
|
67
|
+
samples_2 : int
|
68
|
+
Number of samples in second dry run.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
Dict
|
73
|
+
Dictionary containing:
|
74
|
+
- throughput: float, requests per second
|
75
|
+
- amplification: float, average requests per input sample
|
76
|
+
- startup_overhead: float, fixed startup time in seconds
|
77
|
+
|
78
|
+
Raises
|
79
|
+
------
|
80
|
+
ValueError
|
81
|
+
If throughput cannot be calculated due to invalid measurements.
|
82
|
+
|
83
|
+
Examples
|
84
|
+
--------
|
85
|
+
>>> block1 = {"execution_time_seconds": 1.0, "input_rows": 1, "block_name": "test"}
|
86
|
+
>>> block2 = {"execution_time_seconds": 2.0, "input_rows": 5, "block_name": "test"}
|
87
|
+
>>> result = calculate_block_throughput(block1, block2, 1, 5)
|
88
|
+
>>> assert result["throughput"] > 0
|
89
|
+
"""
|
90
|
+
time_1 = block_1.get("execution_time_seconds", 0)
|
91
|
+
time_2 = block_2.get("execution_time_seconds", 0)
|
92
|
+
requests_1 = block_1.get("input_rows", 0)
|
93
|
+
requests_2 = block_2.get("input_rows", 0)
|
94
|
+
|
95
|
+
# Calculate amplification (requests per sample)
|
96
|
+
amp_1 = requests_1 / samples_1 if samples_1 > 0 else 1
|
97
|
+
amp_2 = requests_2 / samples_2 if samples_2 > 0 else 1
|
98
|
+
avg_amplification = (amp_1 + amp_2) / 2
|
99
|
+
|
100
|
+
# Use linear scaling to extract throughput and overhead from two data points
|
101
|
+
# Model: time = startup_overhead + (requests / throughput)
|
102
|
+
|
103
|
+
if requests_2 > requests_1 and time_2 > time_1:
|
104
|
+
# Calculate marginal time per request (slope of the line)
|
105
|
+
marginal_time = (time_2 - time_1) / (requests_2 - requests_1)
|
106
|
+
|
107
|
+
# Throughput is the inverse of marginal time
|
108
|
+
measured_throughput = 1.0 / marginal_time if marginal_time > 0 else 0
|
109
|
+
|
110
|
+
# Y-intercept is the startup overhead
|
111
|
+
startup_overhead = max(0, time_1 - (requests_1 * marginal_time))
|
112
|
+
else:
|
113
|
+
# Fallback to simple calculation if we don't have good data for scaling
|
114
|
+
throughput_1 = requests_1 / time_1 if time_1 > 0 else 0
|
115
|
+
throughput_2 = requests_2 / time_2 if time_2 > 0 else 0
|
116
|
+
measured_throughput = max(throughput_1, throughput_2)
|
117
|
+
|
118
|
+
# Estimate overhead as a small fraction of time
|
119
|
+
startup_overhead = min(2.0, time_1 * 0.1) # Assume 10% overhead, max 2 seconds
|
120
|
+
|
121
|
+
# If we have no valid measurements, raise an error
|
122
|
+
if measured_throughput == 0:
|
123
|
+
raise ValueError(
|
124
|
+
f"Cannot calculate throughput for block '{block_1.get('block_name', 'unknown')}': "
|
125
|
+
f"No valid measurements from dry runs (time_1={time_1}, time_2={time_2}, "
|
126
|
+
f"requests_1={requests_1}, requests_2={requests_2})"
|
127
|
+
)
|
128
|
+
|
129
|
+
return {
|
130
|
+
"throughput": measured_throughput,
|
131
|
+
"amplification": avg_amplification,
|
132
|
+
"startup_overhead": startup_overhead,
|
133
|
+
}
|
134
|
+
|
135
|
+
|
136
|
+
def calculate_time_with_pipeline(
|
137
|
+
num_requests: float,
|
138
|
+
throughput: float,
|
139
|
+
startup_overhead: float,
|
140
|
+
max_concurrent: int = DRY_RUN_MAX_CONCURRENT,
|
141
|
+
) -> float:
|
142
|
+
"""Calculate time considering pipeline behavior and max concurrent limit.
|
143
|
+
|
144
|
+
Models the execution time for a given number of requests based on throughput,
|
145
|
+
startup overhead, and concurrency constraints. Applies non-linear scaling
|
146
|
+
for diminishing returns at high concurrency levels.
|
147
|
+
|
148
|
+
Parameters
|
149
|
+
----------
|
150
|
+
num_requests : float
|
151
|
+
Total number of requests to process.
|
152
|
+
throughput : float
|
153
|
+
Base throughput in requests per second.
|
154
|
+
startup_overhead : float
|
155
|
+
Fixed startup time overhead in seconds.
|
156
|
+
max_concurrent : int, optional
|
157
|
+
Maximum number of concurrent requests, by default 100.
|
158
|
+
|
159
|
+
Returns
|
160
|
+
-------
|
161
|
+
float
|
162
|
+
Estimated total execution time in seconds.
|
163
|
+
|
164
|
+
Examples
|
165
|
+
--------
|
166
|
+
>>> time = calculate_time_with_pipeline(1000, 10.0, 0.5, 50)
|
167
|
+
>>> assert time > 0
|
168
|
+
"""
|
169
|
+
if num_requests <= 0:
|
170
|
+
return 0
|
171
|
+
|
172
|
+
# Validate and clamp max_concurrent to avoid division by zero
|
173
|
+
if max_concurrent is None or max_concurrent <= 0:
|
174
|
+
max_concurrent = 1
|
175
|
+
|
176
|
+
# The throughput is what we measured - it represents the server's processing capability
|
177
|
+
if max_concurrent == 1:
|
178
|
+
# Sequential execution - no pipelining benefit
|
179
|
+
effective_throughput = throughput
|
180
|
+
else:
|
181
|
+
# Concurrent execution - small pipelining benefit
|
182
|
+
# At most 10% improvement from perfect pipelining (conservative estimate)
|
183
|
+
# Logarithmic growth to model diminishing returns
|
184
|
+
pipelining_factor = 1.0 + (0.1 * math.log(max_concurrent) / math.log(100))
|
185
|
+
pipelining_factor = min(pipelining_factor, 1.1) # Cap at 10% improvement
|
186
|
+
effective_throughput = throughput * pipelining_factor
|
187
|
+
|
188
|
+
# Calculate total time
|
189
|
+
base_time = startup_overhead + (num_requests / effective_throughput)
|
190
|
+
|
191
|
+
return base_time
|
192
|
+
|
193
|
+
|
194
|
+
def estimate_execution_time(
|
195
|
+
dry_run_1: Dict,
|
196
|
+
dry_run_2: Optional[Dict] = None,
|
197
|
+
total_dataset_size: Optional[int] = None,
|
198
|
+
max_concurrency: Optional[int] = None,
|
199
|
+
) -> Dict:
|
200
|
+
"""Estimate execution time based on dry run results.
|
201
|
+
|
202
|
+
Estimates the total execution time for a full dataset based on one or two
|
203
|
+
dry runs with smaller sample sizes. For async blocks (with two dry runs),
|
204
|
+
calculates throughput and concurrency benefits. For sync blocks (single dry run),
|
205
|
+
performs simple linear scaling.
|
206
|
+
|
207
|
+
The estimates include a conservative buffer (20%) to account for API variability,
|
208
|
+
network latency, and other real-world factors.
|
209
|
+
|
210
|
+
Parameters
|
211
|
+
----------
|
212
|
+
dry_run_1 : Dict
|
213
|
+
Results from first dry run, must contain 'sample_size' and 'execution_time_seconds'.
|
214
|
+
dry_run_2 : Optional[Dict], optional
|
215
|
+
Results from second dry run for async estimation, by default None.
|
216
|
+
total_dataset_size : Optional[int], optional
|
217
|
+
Size of full dataset to estimate for. If None, uses original_dataset_size from dry_run_1.
|
218
|
+
max_concurrency : Optional[int], optional
|
219
|
+
Maximum concurrent requests allowed, by default 100.
|
220
|
+
|
221
|
+
Returns
|
222
|
+
-------
|
223
|
+
Dict
|
224
|
+
Estimation results containing:
|
225
|
+
- estimated_time_seconds: float, estimated time with current configuration (includes buffer)
|
226
|
+
- total_estimated_requests: int, total LLM requests (0 for sync blocks)
|
227
|
+
- block_estimates: list, per-block estimates (for async blocks)
|
228
|
+
- note: str, additional information about the estimation
|
229
|
+
|
230
|
+
Examples
|
231
|
+
--------
|
232
|
+
>>> dry_run = {"sample_size": 2, "execution_time_seconds": 10.0}
|
233
|
+
>>> result = estimate_execution_time(dry_run, total_dataset_size=100)
|
234
|
+
>>> assert result["estimated_time_seconds"] > 0
|
235
|
+
>>>
|
236
|
+
>>> # With two dry runs for async estimation
|
237
|
+
>>> dry_run_1 = {"sample_size": 1, "execution_time_seconds": 5.0, "blocks_executed": [...]}
|
238
|
+
>>> dry_run_2 = {"sample_size": 5, "execution_time_seconds": 20.0, "blocks_executed": [...]}
|
239
|
+
>>> result = estimate_execution_time(dry_run_1, dry_run_2, total_dataset_size=1000)
|
240
|
+
>>> assert result["estimated_time_seconds"] > 0
|
241
|
+
"""
|
242
|
+
# Set defaults
|
243
|
+
if max_concurrency is None:
|
244
|
+
max_concurrency = DRY_RUN_MAX_CONCURRENT
|
245
|
+
|
246
|
+
if total_dataset_size is None:
|
247
|
+
total_dataset_size = dry_run_1.get(
|
248
|
+
"original_dataset_size", dry_run_1["sample_size"]
|
249
|
+
)
|
250
|
+
|
251
|
+
# Get sample sizes
|
252
|
+
samples_1 = dry_run_1["sample_size"]
|
253
|
+
samples_2 = (
|
254
|
+
dry_run_2["sample_size"] if dry_run_2 else 5
|
255
|
+
) # Default to 5 if not provided
|
256
|
+
|
257
|
+
# If only one dry run, do simple scaling
|
258
|
+
if dry_run_2 is None:
|
259
|
+
# Process each block individually for synchronous execution
|
260
|
+
blocks_executed = dry_run_1.get("blocks_executed", [])
|
261
|
+
if not blocks_executed:
|
262
|
+
# Fallback to simple scaling if no block details available
|
263
|
+
total_time = dry_run_1["execution_time_seconds"]
|
264
|
+
simple_estimate = (total_time / samples_1) * total_dataset_size
|
265
|
+
# Apply conservative buffer
|
266
|
+
simple_estimate = simple_estimate * ESTIMATION_BUFFER_FACTOR
|
267
|
+
return {
|
268
|
+
"estimated_time_seconds": simple_estimate,
|
269
|
+
"total_estimated_requests": 0,
|
270
|
+
"note": "Synchronous execution - linear scaling from dry run",
|
271
|
+
}
|
272
|
+
|
273
|
+
# Calculate time for each block and sum them
|
274
|
+
total_estimated_time = 0
|
275
|
+
for block in blocks_executed:
|
276
|
+
block_time = block.get("execution_time_seconds", 0)
|
277
|
+
input_rows = block.get("input_rows", samples_1)
|
278
|
+
|
279
|
+
# Calculate time per row for this block
|
280
|
+
if input_rows > 0:
|
281
|
+
time_per_row = block_time / input_rows
|
282
|
+
block_total_time = time_per_row * total_dataset_size
|
283
|
+
total_estimated_time += block_total_time
|
284
|
+
|
285
|
+
# Apply conservative buffer
|
286
|
+
total_estimated_time = total_estimated_time * ESTIMATION_BUFFER_FACTOR
|
287
|
+
return {
|
288
|
+
"estimated_time_seconds": total_estimated_time,
|
289
|
+
"total_estimated_requests": 0,
|
290
|
+
"note": "Synchronous execution - no concurrency",
|
291
|
+
}
|
292
|
+
|
293
|
+
# Analyze each block with async execution
|
294
|
+
block_estimates = []
|
295
|
+
total_time = 0
|
296
|
+
total_requests = 0
|
297
|
+
|
298
|
+
# Process each block
|
299
|
+
for i, block_1 in enumerate(dry_run_1.get("blocks_executed", [])):
|
300
|
+
if i >= len(dry_run_2.get("blocks_executed", [])):
|
301
|
+
break
|
302
|
+
|
303
|
+
block_2 = dry_run_2["blocks_executed"][i]
|
304
|
+
|
305
|
+
# Only process LLM blocks
|
306
|
+
if not is_llm_using_block(block_1):
|
307
|
+
continue
|
308
|
+
|
309
|
+
# Calculate throughput and amplification
|
310
|
+
analysis = calculate_block_throughput(block_1, block_2, samples_1, samples_2)
|
311
|
+
|
312
|
+
# Estimate requests for full dataset
|
313
|
+
estimated_requests = total_dataset_size * analysis["amplification"]
|
314
|
+
|
315
|
+
# Calculate time with pipeline model
|
316
|
+
block_time = calculate_time_with_pipeline(
|
317
|
+
estimated_requests,
|
318
|
+
analysis["throughput"],
|
319
|
+
analysis["startup_overhead"],
|
320
|
+
max_concurrency,
|
321
|
+
)
|
322
|
+
|
323
|
+
total_time += block_time
|
324
|
+
total_requests += estimated_requests
|
325
|
+
|
326
|
+
block_estimates.append(
|
327
|
+
{
|
328
|
+
"block": block_1["block_name"],
|
329
|
+
"estimated_requests": estimated_requests,
|
330
|
+
"throughput": analysis["throughput"],
|
331
|
+
"estimated_time": block_time,
|
332
|
+
"amplification": analysis["amplification"],
|
333
|
+
"startup_overhead": analysis["startup_overhead"],
|
334
|
+
}
|
335
|
+
)
|
336
|
+
|
337
|
+
# Apply conservative buffer to account for API variability, network issues, etc.
|
338
|
+
total_time = total_time * ESTIMATION_BUFFER_FACTOR
|
339
|
+
|
340
|
+
return {
|
341
|
+
"estimated_time_seconds": total_time,
|
342
|
+
"total_estimated_requests": int(total_requests),
|
343
|
+
"block_estimates": block_estimates,
|
344
|
+
}
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml
CHANGED
@@ -19,8 +19,7 @@ metadata:
|
|
19
19
|
- "japanese"
|
20
20
|
|
21
21
|
license: "Apache-2.0"
|
22
|
-
|
23
|
-
|
22
|
+
|
24
23
|
dataset_requirements:
|
25
24
|
required_columns:
|
26
25
|
- "document"
|
@@ -55,17 +54,19 @@ blocks:
|
|
55
54
|
output_cols: raw_summary_detailed
|
56
55
|
max_tokens: 2048
|
57
56
|
async_mode: true
|
57
|
+
# n: 2
|
58
58
|
|
59
59
|
- block_type: LLMParserBlock
|
60
60
|
block_config:
|
61
|
-
block_name:
|
61
|
+
block_name: detailed_summary
|
62
62
|
input_cols: raw_summary_detailed
|
63
63
|
extract_content: true
|
64
|
+
# extract_reasoning_content: true
|
64
65
|
|
65
66
|
- block_type: TextParserBlock
|
66
67
|
block_config:
|
67
68
|
block_name: parse_detailed_summary
|
68
|
-
input_cols:
|
69
|
+
input_cols: detailed_summary_content
|
69
70
|
output_cols: summary_detailed
|
70
71
|
start_tags: [""]
|
71
72
|
end_tags: [""]
|
@@ -87,14 +88,14 @@ blocks:
|
|
87
88
|
|
88
89
|
- block_type: LLMParserBlock
|
89
90
|
block_config:
|
90
|
-
block_name:
|
91
|
+
block_name: atomic_facts
|
91
92
|
input_cols: raw_atomic_facts
|
92
93
|
extract_content: true
|
93
94
|
|
94
95
|
- block_type: TextParserBlock
|
95
96
|
block_config:
|
96
97
|
block_name: parse_atomic_facts
|
97
|
-
input_cols:
|
98
|
+
input_cols: atomic_facts_content
|
98
99
|
output_cols: summary_atomic_facts
|
99
100
|
start_tags: [""]
|
100
101
|
end_tags: [""]
|
@@ -116,14 +117,14 @@ blocks:
|
|
116
117
|
|
117
118
|
- block_type: LLMParserBlock
|
118
119
|
block_config:
|
119
|
-
block_name:
|
120
|
+
block_name: extractive_summary
|
120
121
|
input_cols: raw_summary_extractive
|
121
122
|
extract_content: true
|
122
123
|
|
123
124
|
- block_type: TextParserBlock
|
124
125
|
block_config:
|
125
126
|
block_name: parse_extractive_summary
|
126
|
-
input_cols:
|
127
|
+
input_cols: extractive_summary_content
|
127
128
|
output_cols: summary_extractive
|
128
129
|
start_tags: [""]
|
129
130
|
end_tags: [""]
|
@@ -157,14 +158,14 @@ blocks:
|
|
157
158
|
|
158
159
|
- block_type: LLMParserBlock
|
159
160
|
block_config:
|
160
|
-
block_name:
|
161
|
+
block_name: get_knowledge_generation
|
161
162
|
input_cols: raw_knowledge_generation
|
162
163
|
extract_content: true
|
163
164
|
|
164
165
|
- block_type: TextParserBlock
|
165
166
|
block_config:
|
166
167
|
block_name: parse_knowledge_generation
|
167
|
-
input_cols:
|
168
|
+
input_cols: get_knowledge_generation_content
|
168
169
|
output_cols: [question, response]
|
169
170
|
parsing_pattern: "\\[(?:Question|QUESTION)\\]\\s*(.*?)\\s*\\[(?:Answer|ANSWER)\\]\\s*(.*?)\\s*(?=\\[(?:Question|QUESTION)\\]|$)"
|
170
171
|
parser_cleanup_tags: ["[END]"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: sdg_hub
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.2
|
4
4
|
Summary: Synthetic Data Generation
|
5
5
|
Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
|
6
6
|
License: Apache-2.0
|
@@ -65,6 +65,7 @@ Requires-Dist: pytest-html; extra == "dev"
|
|
65
65
|
Requires-Dist: tox<5,>=4.4.2; extra == "dev"
|
66
66
|
Requires-Dist: ruff; extra == "dev"
|
67
67
|
Requires-Dist: pytest-env; extra == "dev"
|
68
|
+
Requires-Dist: nbconvert>=7.0.0; extra == "dev"
|
68
69
|
Dynamic: license-file
|
69
70
|
|
70
71
|
# `sdg_hub`: Synthetic Data Generation Toolkit
|
@@ -1,7 +1,7 @@
|
|
1
|
-
sdg_hub/__init__.py,sha256=
|
2
|
-
sdg_hub/_version.py,sha256=
|
1
|
+
sdg_hub/__init__.py,sha256=TlkZT40-70urdcWLqv3kupaJj8s-SVgd2QyvlSFwb4A,510
|
2
|
+
sdg_hub/_version.py,sha256=A45grTqzrHuDn1CT9K5GVUbY4_Q3OSTcXAl3zdHzcEI,704
|
3
3
|
sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
sdg_hub/core/__init__.py,sha256=
|
4
|
+
sdg_hub/core/__init__.py,sha256=e3BoejbqjYhasf9t__L4qE52lkD9EBjx4o--2kqKdro,460
|
5
5
|
sdg_hub/core/blocks/__init__.py,sha256=5FsbkcO-dmBv6MqO96TPn9FKKPTQZQCv20j4wR7UvQw,1502
|
6
6
|
sdg_hub/core/blocks/base.py,sha256=-SOdBpJwtRTMsrmCEuLjUBQMRCo_PLYlHEBRrz8sF9g,13031
|
7
7
|
sdg_hub/core/blocks/registry.py,sha256=FuEN_pnq-nSH1LguY3_oCubT6Kz3SuJjk3TcUpLT-lw,10695
|
@@ -32,21 +32,22 @@ sdg_hub/core/blocks/transform/melt_columns.py,sha256=vaYa5Taq6GhNZYWFL4uPK3-SfN2
|
|
32
32
|
sdg_hub/core/blocks/transform/rename_columns.py,sha256=qeB5L2utqDQnutUetH1VKZSqDiJSH_yUp5EFCV-XCVI,1998
|
33
33
|
sdg_hub/core/blocks/transform/text_concat.py,sha256=_-B__Hob1WwgwkILPIZvTnsDzuwtoX1hKviyzHlnnes,3149
|
34
34
|
sdg_hub/core/blocks/transform/uniform_col_val_setter.py,sha256=XnjiT29z3PzIPy8M-mmE2w-Miab6Ed5ahy32SaxTCTE,3263
|
35
|
-
sdg_hub/core/flow/__init__.py,sha256=
|
36
|
-
sdg_hub/core/flow/base.py,sha256=
|
35
|
+
sdg_hub/core/flow/__init__.py,sha256=0_m_htuZfPxk8xQ9IKfp0Pz-JRE4O7lYMUFrKyLNoLA,409
|
36
|
+
sdg_hub/core/flow/base.py,sha256=4kR-dKXAlLFSwm3YWdT8EoedCIGJT56agcot3tQb6VY,59508
|
37
37
|
sdg_hub/core/flow/checkpointer.py,sha256=stm5ZtjjEiLk9ZkAAnoQQn5Y8Yl_d7qCsQLZTrCXR48,11867
|
38
|
-
sdg_hub/core/flow/metadata.py,sha256=
|
38
|
+
sdg_hub/core/flow/metadata.py,sha256=cFrpJjWOaK87aCuRFyC3Pdf83oYU93mrmZEMdUnhsN8,10540
|
39
39
|
sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d4,7558
|
40
|
-
sdg_hub/core/flow/registry.py,sha256=
|
40
|
+
sdg_hub/core/flow/registry.py,sha256=N6KfX-L7QRkooznIFxDuhRZYuDA5g3N5zC-KRm2jVhk,12109
|
41
41
|
sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
|
42
|
-
sdg_hub/core/utils/__init__.py,sha256=
|
42
|
+
sdg_hub/core/utils/__init__.py,sha256=KcT56JhobC5sBg0MKEMn5hc4OyKa9_Vnn45Mt_kS4jQ,610
|
43
43
|
sdg_hub/core/utils/datautils.py,sha256=__HkUe1DxcJVHKrFX68z_hDXwxJygBlJDfjJLnj7rHc,4230
|
44
44
|
sdg_hub/core/utils/error_handling.py,sha256=yku8cGj_nKCyXDsnb-mHCpgukkkAMucJ4iAUrIzqysc,5510
|
45
45
|
sdg_hub/core/utils/flow_id_words.yaml,sha256=5QHpQdP7zwahRuooyAlJIwBY7WcDR7vtbJXxVJqujbg,2317
|
46
46
|
sdg_hub/core/utils/flow_identifier.py,sha256=aAHfK_G9AwEtMglLRMdMpi_AI1dciub5UqBGm4yb2HE,2841
|
47
|
-
sdg_hub/core/utils/flow_metrics.py,sha256=
|
47
|
+
sdg_hub/core/utils/flow_metrics.py,sha256=3G-xbfr-rFA578wV4KUbQePTMVGZHr9-rXvyYL4Kt2Q,12604
|
48
48
|
sdg_hub/core/utils/logger_config.py,sha256=6_cnsIHtSAdq1iTTZ7Q7nAJ1dmldlxSZ0AB49yLiQ20,2034
|
49
49
|
sdg_hub/core/utils/path_resolution.py,sha256=yWof4kGNpQ5dKcrVHg0h9KfOKLZ6ROjdfsLAZsQT5rM,2000
|
50
|
+
sdg_hub/core/utils/time_estimator.py,sha256=rM3_R-Ka5DEtvOtlJoA_5pXSyQ6tT6t4h6qh3_5BCZo,12639
|
50
51
|
sdg_hub/core/utils/yaml_utils.py,sha256=tShCd-FFkp0xlKnLe7dXsMOR4AvT9d2qRUmu4ZnPSEY,1458
|
51
52
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
53
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_answers.yaml,sha256=THRT3cY44KGI_69B2wqt2Q89EknnOSE7B4A_jdnxlIU,330
|
@@ -54,14 +55,14 @@ sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/gener
|
|
54
55
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/generate_question_list.yaml,sha256=qHOgUNrQz2vjUjJiEHNGWxDDXwjJlP1kofTxeGgLyPI,1461
|
55
56
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
57
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/detailed_summary.yaml,sha256=Ik6gAml0O-jPq8jpXBAkURzYkQuFOnDZb4LDwjmfAiE,381
|
57
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=
|
58
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/detailed_summary/flow.yaml,sha256=fUdzY9dtU69o99Uq8FIPycgVWdLD-1kbY97Bh-Vo2A0,5538
|
58
59
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
59
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml,sha256=
|
60
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/doc_direct_qa/flow.yaml,sha256=smPWVUZRCt58EagWDmJVmTBQj8qMcjpzh-Q3GSuFrz0,4413
|
60
61
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
61
62
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/extractive_summary.yaml,sha256=SeapWoOx3fhN5SvWYuHss_9prLE8xSkOic7JkbDHSR0,4081
|
62
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=
|
63
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/extractive_summary/flow.yaml,sha256=iNNIfofFE7awK7iivtIFWxjfjy8QviMugOPPnOTySKA,5706
|
63
64
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
64
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=
|
65
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/flow.yaml,sha256=CIUZNYhvszT-jpz1Hvh6nS2y5W34P529ZOMp8thEQ9k,3219
|
65
66
|
sdg_hub/flows/qa_generation/document_grounded_qa/enhanced_multi_summary_qa/key_facts/key_facts_summary.yaml,sha256=YKMX_CuvcThG_bdNCAIXdVBkMvB72I89RGq2ltSSgc8,3298
|
66
67
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
68
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -71,24 +72,24 @@ sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/ev
|
|
71
72
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_question.yaml,sha256=zwzklXup6khRkR88avgrJTcjaMcV1wnbeYaML5oPuNs,1767
|
72
73
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/evaluate_relevancy.yaml,sha256=cA8igo7jMrRXaWW6k0of6KOp7YnxLtPj0fP4DbrmZNQ,3647
|
73
74
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/extractive_summary.yaml,sha256=fcMV7LaCFZo4D29nwhGJXqFFuZMYVLo9XYjv8zcU6zs,364
|
74
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=
|
75
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/flow.yaml,sha256=HR8sf7RUZKr8UqKztBj-nlvyrve1UMUu8x8qgYM6O14,9055
|
75
76
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/instructlab/generate_questions_responses.yaml,sha256=yX8aLY8dJSDML9ZJhnj9RzPbN8tH2xfcM4Gc6xZuwqQ,2596
|
76
77
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/README.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
77
78
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
79
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/atomic_facts_ja.yaml,sha256=OjPZaSCOSLxEWgW3pmNwF7mmLhGhFGTmKL_3rKdqeW4,2488
|
79
80
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/detailed_summary_ja.yaml,sha256=nEy_RcotHGiiENrmUANpKkbIFsrARAeSwECrBeHi2so,391
|
80
81
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/extractive_summary_ja.yaml,sha256=V90W0IeJQZTFThA8v0UOs3DtZbtU3BI9jkpChw1BULo,402
|
81
|
-
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml,sha256=
|
82
|
+
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/flow.yaml,sha256=jumjKmKshSd8hoTYpyBJ0nMOADeQmxBmNPY7yfa_xQ8,9171
|
82
83
|
sdg_hub/flows/qa_generation/document_grounded_qa/multi_summary_qa/multilingual/japanese/generate_questions_responses_ja.yaml,sha256=96SQqXG7fmb-50SdX85sgVtrFcQ-oNKe_0BoQdZmY5g,2638
|
83
84
|
sdg_hub/flows/text_analysis/__init__.py,sha256=WStks4eM_KHNTVsHglcj8vFghmI0PH9P1hUrijBLbwc,125
|
84
85
|
sdg_hub/flows/text_analysis/structured_insights/__init__.py,sha256=_DT4NR05JD9CZoSWROPr2lC6se0VjSqQPZJJlEV79mk,274
|
85
86
|
sdg_hub/flows/text_analysis/structured_insights/analyze_sentiment.yaml,sha256=1YGPypFJYS8qfYFj2J6ERTgodKJvMF4YHNGt_vOF5qc,1000
|
86
87
|
sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml,sha256=Q_SDy14Zu-qS2sbKfUBmGlYj3k7CUg6HzzXlFCXRKuU,1169
|
87
88
|
sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml,sha256=_nPPMdHnxag_lYbhYUjGJGo-CvRwWvwdGX7cQhdZ1S0,847
|
88
|
-
sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=
|
89
|
+
sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=BBV18SdvuVTAESjwkJ7V1jbb-cSTBvNl3SCycd0oEQ4,4934
|
89
90
|
sdg_hub/flows/text_analysis/structured_insights/summarize.yaml,sha256=WXwQak1pF8e1OwnOoI1EHu8QB6iUNW89rfkTdi1Oq54,687
|
90
|
-
sdg_hub-0.4.
|
91
|
-
sdg_hub-0.4.
|
92
|
-
sdg_hub-0.4.
|
93
|
-
sdg_hub-0.4.
|
94
|
-
sdg_hub-0.4.
|
91
|
+
sdg_hub-0.4.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
92
|
+
sdg_hub-0.4.2.dist-info/METADATA,sha256=5qbw9_DoVmfntmQlvz4VPdQXdUXoLO8Zhrxbc1uY7b0,9783
|
93
|
+
sdg_hub-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
94
|
+
sdg_hub-0.4.2.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
|
95
|
+
sdg_hub-0.4.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|