crewplus 0.2.74__py3-none-any.whl → 0.2.75__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crewplus might be problematic. Click here for more details.
- crewplus/services/model_load_balancer.py +24 -10
- crewplus/vectorstores/milvus/vdb_service.py +0 -6
- {crewplus-0.2.74.dist-info → crewplus-0.2.75.dist-info}/METADATA +1 -1
- {crewplus-0.2.74.dist-info → crewplus-0.2.75.dist-info}/RECORD +7 -7
- {crewplus-0.2.74.dist-info → crewplus-0.2.75.dist-info}/WHEEL +0 -0
- {crewplus-0.2.74.dist-info → crewplus-0.2.75.dist-info}/entry_points.txt +0 -0
- {crewplus-0.2.74.dist-info → crewplus-0.2.75.dist-info}/licenses/LICENSE +0 -0
|
@@ -68,7 +68,7 @@ class ModelLoadBalancer:
|
|
|
68
68
|
self.logger.error(f"Failed to load model configuration: {e}", exc_info=True)
|
|
69
69
|
raise RuntimeError(f"Failed to load model configuration: {e}")
|
|
70
70
|
|
|
71
|
-
def get_model(self, provider: str = None, model_type: str = None, deployment_name: str = None, with_metadata: bool = False, selection_strategy: str = 'random'):
|
|
71
|
+
def get_model(self, provider: str = None, model_type: str = None, deployment_name: str = None, with_metadata: bool = False, selection_strategy: str = 'random', disable_streaming: bool = False):
|
|
72
72
|
"""
|
|
73
73
|
Get a model instance.
|
|
74
74
|
|
|
@@ -82,6 +82,7 @@ class ModelLoadBalancer:
|
|
|
82
82
|
deployment_name: The unique name for the model deployment.
|
|
83
83
|
with_metadata: If True, returns a tuple of (model, deployment_name).
|
|
84
84
|
selection_strategy: The selection strategy ('random', 'round_robin', or 'least_used'). Defaults to 'random'.
|
|
85
|
+
disable_streaming: If True, get a model instance with streaming disabled.
|
|
85
86
|
|
|
86
87
|
Returns:
|
|
87
88
|
An instantiated language model object, or a tuple if with_metadata is True.
|
|
@@ -97,7 +98,7 @@ class ModelLoadBalancer:
|
|
|
97
98
|
if deployment_name:
|
|
98
99
|
for model_config in self.models_config:
|
|
99
100
|
if model_config.get('deployment_name') == deployment_name:
|
|
100
|
-
model = self._get_or_create_model(model_config)
|
|
101
|
+
model = self._get_or_create_model(model_config, disable_streaming)
|
|
101
102
|
if with_metadata:
|
|
102
103
|
return model, deployment_name
|
|
103
104
|
return model
|
|
@@ -121,7 +122,7 @@ class ModelLoadBalancer:
|
|
|
121
122
|
self.logger.warning(f"Unsupported selection strategy: '{selection_strategy}'. Defaulting to 'random'.")
|
|
122
123
|
selected_model_config = self._random_selection(candidates)
|
|
123
124
|
|
|
124
|
-
model = self._get_or_create_model(selected_model_config)
|
|
125
|
+
model = self._get_or_create_model(selected_model_config, disable_streaming)
|
|
125
126
|
if with_metadata:
|
|
126
127
|
return model, selected_model_config.get('deployment_name')
|
|
127
128
|
return model
|
|
@@ -134,21 +135,25 @@ class ModelLoadBalancer:
|
|
|
134
135
|
self.thread_local.models_cache = {}
|
|
135
136
|
return self.thread_local.models_cache
|
|
136
137
|
|
|
137
|
-
def _get_or_create_model(self, model_config: Dict):
|
|
138
|
+
def _get_or_create_model(self, model_config: Dict, disable_streaming: bool = False):
|
|
138
139
|
"""
|
|
139
140
|
Gets a model instance from the thread-local cache. If it doesn't exist,
|
|
140
141
|
it instantiates, caches, and returns it.
|
|
141
142
|
"""
|
|
142
143
|
model_id = model_config['id']
|
|
144
|
+
cache_key = f"{model_id}"
|
|
145
|
+
if disable_streaming:
|
|
146
|
+
cache_key += "-non-streaming"
|
|
147
|
+
|
|
143
148
|
models_cache = self._get_thread_local_models_cache()
|
|
144
149
|
|
|
145
|
-
if
|
|
146
|
-
self.logger.debug(f"Creating new model instance for id {
|
|
147
|
-
models_cache[
|
|
150
|
+
if cache_key not in models_cache:
|
|
151
|
+
self.logger.debug(f"Creating new model instance for id {cache_key} in thread {threading.get_ident()}")
|
|
152
|
+
models_cache[cache_key] = self._instantiate_model(model_config, disable_streaming)
|
|
148
153
|
|
|
149
|
-
return models_cache[
|
|
154
|
+
return models_cache[cache_key]
|
|
150
155
|
|
|
151
|
-
def _instantiate_model(self, model_config: Dict):
|
|
156
|
+
def _instantiate_model(self, model_config: Dict, disable_streaming: bool = False):
|
|
152
157
|
"""Instantiate and return an LLM object based on the model configuration"""
|
|
153
158
|
provider = model_config['provider']
|
|
154
159
|
self.logger.debug(f"Model balancer: instantiating {provider} -- {model_config.get('deployment_name')}")
|
|
@@ -162,8 +167,13 @@ class ModelLoadBalancer:
|
|
|
162
167
|
}
|
|
163
168
|
if 'temperature' in model_config:
|
|
164
169
|
kwargs['temperature'] = model_config['temperature']
|
|
165
|
-
|
|
170
|
+
|
|
171
|
+
# The 'disable_streaming' parameter takes precedence
|
|
172
|
+
if disable_streaming:
|
|
173
|
+
kwargs['disable_streaming'] = True
|
|
174
|
+
elif model_config.get('deployment_name') == 'o1-mini':
|
|
166
175
|
kwargs['disable_streaming'] = True
|
|
176
|
+
|
|
167
177
|
return TracedAzureChatOpenAI(**kwargs)
|
|
168
178
|
elif provider == 'openai':
|
|
169
179
|
kwargs = {
|
|
@@ -197,6 +207,8 @@ class ModelLoadBalancer:
|
|
|
197
207
|
kwargs['temperature'] = model_config['temperature']
|
|
198
208
|
if 'max_tokens' in model_config:
|
|
199
209
|
kwargs['max_tokens'] = model_config['max_tokens']
|
|
210
|
+
if disable_streaming:
|
|
211
|
+
kwargs['disable_streaming'] = True
|
|
200
212
|
return GeminiChatModel(**kwargs)
|
|
201
213
|
elif provider == 'vertex-ai':
|
|
202
214
|
deployment_name = model_config['deployment_name']
|
|
@@ -216,6 +228,8 @@ class ModelLoadBalancer:
|
|
|
216
228
|
kwargs['temperature'] = model_config['temperature']
|
|
217
229
|
if 'max_tokens' in model_config:
|
|
218
230
|
kwargs['max_tokens'] = model_config['max_tokens']
|
|
231
|
+
if disable_streaming:
|
|
232
|
+
kwargs['disable_streaming'] = True
|
|
219
233
|
return GeminiChatModel(**kwargs)
|
|
220
234
|
else:
|
|
221
235
|
self.logger.error(f"Unsupported provider: {provider}")
|
|
@@ -205,12 +205,6 @@ class VDBService(object):
|
|
|
205
205
|
# Return a copy without the default alias, as it will be added specifically for sync/async clients.
|
|
206
206
|
zilliz_args = self.connection_args.copy()
|
|
207
207
|
zilliz_args.pop('alias', None)
|
|
208
|
-
# 增加 gRPC keepalive 选项来加固连接
|
|
209
|
-
zilliz_args['channel_options'] = [
|
|
210
|
-
('grpc.keepalive_time_ms', 60000), # 每 60 秒发送一次 ping
|
|
211
|
-
('grpc.keepalive_timeout_ms', 20000), # 20 秒内没收到 pong 则认为连接断开
|
|
212
|
-
('grpc.enable_http_proxy', 0),
|
|
213
|
-
]
|
|
214
208
|
return zilliz_args
|
|
215
209
|
else:
|
|
216
210
|
self.logger.error(f"Unsupported vector store provider: {provider}")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
crewplus-0.2.
|
|
2
|
-
crewplus-0.2.
|
|
3
|
-
crewplus-0.2.
|
|
4
|
-
crewplus-0.2.
|
|
1
|
+
crewplus-0.2.75.dist-info/METADATA,sha256=di8TmRsAu9bf9dcx04noDYD1TqdD_UvZR9Tj-kMrAC4,5424
|
|
2
|
+
crewplus-0.2.75.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
3
|
+
crewplus-0.2.75.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
4
|
+
crewplus-0.2.75.dist-info/licenses/LICENSE,sha256=2_NHSHRTKB_cTcT_GXgcenOCtIZku8j343mOgAguTfc,1087
|
|
5
5
|
crewplus/__init__.py,sha256=m46HkZL1Y4toD619NL47Sn2Qe084WFFSFD7e6VoYKZc,284
|
|
6
6
|
crewplus/callbacks/__init__.py,sha256=YG7ieeb91qEjp1zF0-inEN7mjZ7yT_D2yzdWFT8Z1Ws,63
|
|
7
7
|
crewplus/callbacks/async_langfuse_handler.py,sha256=8_p7ctgcmDNQgF5vOqA47I0x-3GWsm7zioZcZHgedZk,7163
|
|
@@ -9,7 +9,7 @@ crewplus/services/__init__.py,sha256=V1CG8b2NOmRzNgQH7BPl4KVxWSYJH5vfEsW1wVErKNE
|
|
|
9
9
|
crewplus/services/azure_chat_model.py,sha256=iWzJ2GQFSNmwJx-2O5_xKPSB6VVc-7T6bcfFI8_WezA,5521
|
|
10
10
|
crewplus/services/gemini_chat_model.py,sha256=DYqz01H2TIHiCDQesSozVfOsMigno6QGwOtIweg7UHk,40103
|
|
11
11
|
crewplus/services/init_services.py,sha256=tc1ti8Yufo2ixlJpwg8uH0KmoyQ4EqxCOe4uTEWnlRM,2413
|
|
12
|
-
crewplus/services/model_load_balancer.py,sha256=
|
|
12
|
+
crewplus/services/model_load_balancer.py,sha256=HIx-k-FiizJSF4e88SFxfFVNS93vJR2zrOdU_fg26FU,12826
|
|
13
13
|
crewplus/services/tracing_manager.py,sha256=pwNFeA77vnoZMh_AUOnK5TvAaPOOLg5oDnVOe1yUa9A,8502
|
|
14
14
|
crewplus/utils/__init__.py,sha256=2Gk1n5srFJQnFfBuYTxktdtKOVZyNrFcNaZKhXk35Pw,142
|
|
15
15
|
crewplus/utils/schema_action.py,sha256=GDaBoVFQD1rXqrLVSMTfXYW1xcUu7eDcHsn57XBSnIg,422
|
|
@@ -18,9 +18,9 @@ crewplus/utils/tracing_util.py,sha256=ew5VwjTKcY88P2sveIlGqmsNFR5OJ-DjKAHKQzBoTy
|
|
|
18
18
|
crewplus/vectorstores/milvus/__init__.py,sha256=OeYv2rdyG7tcREIjBJPyt2TbE54NvyeRoWMe7LwopRE,245
|
|
19
19
|
crewplus/vectorstores/milvus/milvus_schema_manager.py,sha256=-QRav-hzu-XWeJ_yKUMolal_EyMUspSg-nvh5sqlrlQ,11442
|
|
20
20
|
crewplus/vectorstores/milvus/schema_milvus.py,sha256=wwNpfqsKS0xeozZES40IvB0iNwUtpCall_7Hkg0dL1g,27223
|
|
21
|
-
crewplus/vectorstores/milvus/vdb_service.py,sha256=
|
|
21
|
+
crewplus/vectorstores/milvus/vdb_service.py,sha256=U8I6IUYZK0gCe1R9rTnVezvZfEcUS9UEbKEoeJPX8kY,37528
|
|
22
22
|
docs/GeminiChatModel.md,sha256=zZYyl6RmjZTUsKxxMiC9O4yV70MC4TD-IGUmWhIDBKA,8677
|
|
23
23
|
docs/ModelLoadBalancer.md,sha256=aGHES1dcXPz4c7Y8kB5-vsCNJjriH2SWmjBkSGoYKiI,4398
|
|
24
24
|
docs/VDBService.md,sha256=Dw286Rrf_fsi13jyD3Bo4Sy7nZ_G7tYm7d8MZ2j9hxk,9375
|
|
25
25
|
docs/index.md,sha256=3tlc15uR8lzFNM5WjdoZLw0Y9o1P1gwgbEnOdIBspqc,1643
|
|
26
|
-
crewplus-0.2.
|
|
26
|
+
crewplus-0.2.75.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|