sdg-hub 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdg_hub/_version.py +2 -2
- sdg_hub/core/blocks/llm/client_manager.py +26 -1
- sdg_hub/core/utils/datautils.py +40 -22
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.3.1.dist-info}/METADATA +1 -1
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.3.1.dist-info}/RECORD +8 -8
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.3.1.dist-info}/WHEEL +0 -0
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {sdg_hub-0.3.0.dist-info → sdg_hub-0.3.1.dist-info}/top_level.txt +0 -0
sdg_hub/_version.py
CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.3.
|
32
|
-
__version_tuple__ = version_tuple = (0, 3,
|
31
|
+
__version__ = version = '0.3.1'
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 1)
|
33
33
|
|
34
34
|
__commit_id__ = commit_id = None
|
@@ -214,8 +214,33 @@ class LLMClientManager:
|
|
214
214
|
messages_list = messages
|
215
215
|
|
216
216
|
if max_concurrency is not None:
|
217
|
+
if max_concurrency < 1:
|
218
|
+
raise ValueError(
|
219
|
+
"max_concurrency must be greater than 0, got {max_concurrency}"
|
220
|
+
)
|
221
|
+
# Adjust concurrency based on n parameter to avoid overwhelming API
|
222
|
+
# when n > 1 (multiple completions per request)
|
223
|
+
n_value = overrides.get("n") or self.config.n or 1
|
224
|
+
if n_value > 1:
|
225
|
+
# Warn if max_concurrency is less than n
|
226
|
+
if max_concurrency < n_value:
|
227
|
+
logger.warning(
|
228
|
+
f"max_concurrency ({max_concurrency}) is less than n ({n_value}). "
|
229
|
+
f"This may result in very low concurrency. Consider increasing max_concurrency "
|
230
|
+
f"or reducing n for better performance."
|
231
|
+
)
|
232
|
+
|
233
|
+
# Reduce concurrency when generating multiple completions per request
|
234
|
+
adjusted_concurrency = max(1, max_concurrency // n_value)
|
235
|
+
logger.debug(
|
236
|
+
f"Adjusted max_concurrency from {max_concurrency} to {adjusted_concurrency} "
|
237
|
+
f"for n={n_value} completions per request"
|
238
|
+
)
|
239
|
+
else:
|
240
|
+
adjusted_concurrency = max_concurrency
|
241
|
+
|
217
242
|
# Use semaphore for concurrency control
|
218
|
-
semaphore = asyncio.Semaphore(
|
243
|
+
semaphore = asyncio.Semaphore(adjusted_concurrency)
|
219
244
|
|
220
245
|
async def _create_with_semaphore(msgs):
|
221
246
|
async with semaphore:
|
sdg_hub/core/utils/datautils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# Third Party
|
2
2
|
from datasets import Dataset, concatenate_datasets
|
3
|
+
import numpy as np
|
3
4
|
|
4
5
|
# Local
|
5
6
|
from .error_handling import FlowValidationError
|
@@ -39,28 +40,45 @@ def validate_no_duplicates(dataset: Dataset) -> None:
|
|
39
40
|
|
40
41
|
df = dataset.to_pandas()
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
43
|
+
def is_hashable(x):
|
44
|
+
try:
|
45
|
+
hash(x)
|
46
|
+
return True
|
47
|
+
except TypeError:
|
48
|
+
return False
|
49
|
+
|
50
|
+
def make_hashable(x):
|
51
|
+
if is_hashable(x):
|
52
|
+
# int, float, str, bytes, None etc. are already hashable
|
53
|
+
return x
|
54
|
+
if isinstance(x, np.ndarray):
|
55
|
+
if x.ndim == 0:
|
56
|
+
return make_hashable(x.item())
|
57
|
+
return tuple(make_hashable(i) for i in x)
|
58
|
+
if isinstance(x, dict):
|
59
|
+
# sort robustly even with heterogeneous key types
|
60
|
+
return tuple(
|
61
|
+
sorted(
|
62
|
+
((k, make_hashable(v)) for k, v in x.items()),
|
63
|
+
key=lambda kv: repr(kv[0]),
|
64
|
+
)
|
65
|
+
)
|
66
|
+
if isinstance(x, (set, frozenset)):
|
67
|
+
# order‑insensitive
|
68
|
+
return frozenset(make_hashable(i) for i in x)
|
69
|
+
if hasattr(x, "__iter__"):
|
70
|
+
# lists, tuples, custom iterables
|
71
|
+
return tuple(make_hashable(i) for i in x)
|
72
|
+
# last‑resort fallback to a stable representation
|
73
|
+
return repr(x)
|
74
|
+
|
75
|
+
# Apply to the whole dataframe to ensure every cell is hashable
|
76
|
+
if hasattr(df, "map"):
|
77
|
+
df = df.map(make_hashable)
|
78
|
+
else:
|
79
|
+
df = df.applymap(make_hashable)
|
80
|
+
|
81
|
+
duplicate_count = int(df.duplicated(keep="first").sum())
|
64
82
|
if duplicate_count > 0:
|
65
83
|
raise FlowValidationError(
|
66
84
|
f"Input dataset contains {duplicate_count} duplicate rows. "
|
@@ -1,5 +1,5 @@
|
|
1
1
|
sdg_hub/__init__.py,sha256=Tw-6R5a8_W1kJcTAsW3R9ltBDP1dy5-fe7Tvt3cSyCQ,550
|
2
|
-
sdg_hub/_version.py,sha256=
|
2
|
+
sdg_hub/_version.py,sha256=gGLpQUQx-ty9SEy9PYw9OgJWWzJLBnCpfJOfzL7SjlI,704
|
3
3
|
sdg_hub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
sdg_hub/core/__init__.py,sha256=NwqB4fwhC29W50VW7QXZssLxx122YvgO9LHDLdgAnrI,496
|
5
5
|
sdg_hub/core/blocks/__init__.py,sha256=9sCkCvDQzJGSedaePVlEIpbNwrkBz_K500VW_6FLhuE,1601
|
@@ -22,7 +22,7 @@ sdg_hub/core/blocks/evaluation/verify_question_block.py,sha256=LKoIHdxUuTVO24n_M
|
|
22
22
|
sdg_hub/core/blocks/filtering/__init__.py,sha256=isxSVSvDqkMjG8dQSl3Q2M4g5c1t9fTjBSA21icf-yA,275
|
23
23
|
sdg_hub/core/blocks/filtering/column_value_filter.py,sha256=H8Gif0q9Wc_d1TnVow8Zpsg7blJOFGN1EZmV6OPpkcg,5971
|
24
24
|
sdg_hub/core/blocks/llm/__init__.py,sha256=N6-Prgd4X85oWbMQzhYMrq7OX-NTJm57cghowK-val0,844
|
25
|
-
sdg_hub/core/blocks/llm/client_manager.py,sha256=
|
25
|
+
sdg_hub/core/blocks/llm/client_manager.py,sha256=6RNqYvFIh4SF6jopI6tTY5MA01y8Qo-tAhsE0GeHZZ0,16109
|
26
26
|
sdg_hub/core/blocks/llm/config.py,sha256=gc4xp5D20MSlKMFEos0QAaKUwgbZpBtMGXmn6LsIk78,11289
|
27
27
|
sdg_hub/core/blocks/llm/error_handler.py,sha256=7T-019ZFB9qgZoX1ybIiXyaLjPzrF96qcKmUu6vmO6g,12178
|
28
28
|
sdg_hub/core/blocks/llm/llm_chat_block.py,sha256=9ytjxjADM0FydkLapZPSQPfzjjrFIdFONs3EJEoKnaw,23007
|
@@ -45,7 +45,7 @@ sdg_hub/core/flow/migration.py,sha256=6and-RBqV0t2gRipr1GiOOVnyBJdtyyjw1kO08Z--d
|
|
45
45
|
sdg_hub/core/flow/registry.py,sha256=DzCqEEgwhvwnCBAGLogoMVdwXh4pCHrxOWqoxam7O8I,12162
|
46
46
|
sdg_hub/core/flow/validation.py,sha256=pUJvgaUjLpKNwvW6djcqVOF-HShOjegEmGOnUnoX4BA,9722
|
47
47
|
sdg_hub/core/utils/__init__.py,sha256=C2FzLn3dHprwGJDEgI4fyFS3aoCJR-9PhHsunxropJ8,351
|
48
|
-
sdg_hub/core/utils/datautils.py,sha256=
|
48
|
+
sdg_hub/core/utils/datautils.py,sha256=__HkUe1DxcJVHKrFX68z_hDXwxJygBlJDfjJLnj7rHc,4230
|
49
49
|
sdg_hub/core/utils/error_handling.py,sha256=yku8cGj_nKCyXDsnb-mHCpgukkkAMucJ4iAUrIzqysc,5510
|
50
50
|
sdg_hub/core/utils/flow_id_words.yaml,sha256=5QHpQdP7zwahRuooyAlJIwBY7WcDR7vtbJXxVJqujbg,2317
|
51
51
|
sdg_hub/core/utils/flow_identifier.py,sha256=aAHfK_G9AwEtMglLRMdMpi_AI1dciub5UqBGm4yb2HE,2841
|
@@ -83,8 +83,8 @@ sdg_hub/flows/text_analysis/structured_insights/extract_entities.yaml,sha256=Q_S
|
|
83
83
|
sdg_hub/flows/text_analysis/structured_insights/extract_keywords.yaml,sha256=_nPPMdHnxag_lYbhYUjGJGo-CvRwWvwdGX7cQhdZ1S0,847
|
84
84
|
sdg_hub/flows/text_analysis/structured_insights/flow.yaml,sha256=2HuGTyNwYe6a8Ev-QdKZXwe29NL4wOkq4ecEV9a7NDg,4221
|
85
85
|
sdg_hub/flows/text_analysis/structured_insights/summarize.yaml,sha256=WXwQak1pF8e1OwnOoI1EHu8QB6iUNW89rfkTdi1Oq54,687
|
86
|
-
sdg_hub-0.3.
|
87
|
-
sdg_hub-0.3.
|
88
|
-
sdg_hub-0.3.
|
89
|
-
sdg_hub-0.3.
|
90
|
-
sdg_hub-0.3.
|
86
|
+
sdg_hub-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
87
|
+
sdg_hub-0.3.1.dist-info/METADATA,sha256=-dPDzTaPfnMb_n6p7Jcvkqv3Y-Ihi76psItQL7DQBX8,9735
|
88
|
+
sdg_hub-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
89
|
+
sdg_hub-0.3.1.dist-info/top_level.txt,sha256=TqI7d-HE1n6zkXFkU0nF3A1Ct0P0pBaqI675uFokhx4,8
|
90
|
+
sdg_hub-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|