MindsDB 25.7.3.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +11 -1
- mindsdb/api/a2a/common/server/server.py +16 -6
- mindsdb/api/executor/command_executor.py +215 -150
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
- mindsdb/api/executor/planner/plan_join.py +3 -0
- mindsdb/api/executor/planner/plan_join_ts.py +117 -100
- mindsdb/api/executor/planner/query_planner.py +1 -0
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
- mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
- mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
- mindsdb/api/executor/utilities/sql.py +30 -0
- mindsdb/api/http/initialize.py +18 -44
- mindsdb/api/http/namespaces/agents.py +23 -20
- mindsdb/api/http/namespaces/chatbots.py +83 -120
- mindsdb/api/http/namespaces/file.py +1 -1
- mindsdb/api/http/namespaces/jobs.py +38 -60
- mindsdb/api/http/namespaces/tree.py +69 -61
- mindsdb/api/http/namespaces/views.py +56 -72
- mindsdb/api/mcp/start.py +2 -0
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
- mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
- mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
- mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
- mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
- mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
- mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
- mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
- mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
- mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
- mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -77
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +5 -2
- mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
- mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
- mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
- mindsdb/integrations/handlers/salesforce_handler/constants.py +215 -0
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +141 -80
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +0 -1
- mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
- mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
- mindsdb/integrations/libs/llm/config.py +0 -14
- mindsdb/integrations/libs/llm/utils.py +0 -15
- mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
- mindsdb/integrations/utilities/files/file_reader.py +5 -19
- mindsdb/integrations/utilities/handler_utils.py +32 -12
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +246 -149
- mindsdb/interfaces/agents/constants.py +0 -1
- mindsdb/interfaces/agents/langchain_agent.py +11 -6
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +4 -4
- mindsdb/interfaces/database/database.py +38 -13
- mindsdb/interfaces/database/integrations.py +20 -5
- mindsdb/interfaces/database/projects.py +174 -23
- mindsdb/interfaces/database/views.py +86 -60
- mindsdb/interfaces/jobs/jobs_controller.py +103 -110
- mindsdb/interfaces/knowledge_base/controller.py +33 -6
- mindsdb/interfaces/knowledge_base/evaluate.py +2 -1
- mindsdb/interfaces/knowledge_base/executor.py +24 -0
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
- mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
- mindsdb/interfaces/query_context/context_controller.py +111 -145
- mindsdb/interfaces/skills/skills_controller.py +18 -6
- mindsdb/interfaces/storage/db.py +40 -6
- mindsdb/interfaces/variables/variables_controller.py +8 -15
- mindsdb/utilities/config.py +5 -3
- mindsdb/utilities/fs.py +54 -17
- mindsdb/utilities/functions.py +72 -60
- mindsdb/utilities/log.py +38 -6
- mindsdb/utilities/ps.py +7 -7
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/METADATA +282 -268
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/RECORD +94 -92
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
- mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
- mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
- mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
- mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
- /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,290 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import json
|
|
3
|
-
import pandas as pd
|
|
4
|
-
from typing import Text, Optional, Dict
|
|
5
|
-
|
|
6
|
-
from mindsdb.integrations.utilities.handler_utils import get_api_key
|
|
7
|
-
from mindsdb.integrations.libs.llm.utils import ft_jsonl_validation, ft_formatter
|
|
8
|
-
from mindsdb.integrations.handlers.openai_handler.openai_handler import OpenAIHandler
|
|
9
|
-
from mindsdb.integrations.handlers.anyscale_endpoints_handler.settings import anyscale_handler_config
|
|
10
|
-
from mindsdb.utilities import log
|
|
11
|
-
|
|
12
|
-
logger = log.getLogger(__name__)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class AnyscaleEndpointsHandler(OpenAIHandler):
|
|
16
|
-
"""
|
|
17
|
-
This handler handles connection and inference with the Anyscale Endpoints API.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
name = 'anyscale_endpoints'
|
|
21
|
-
|
|
22
|
-
def __init__(self, *args, **kwargs):
|
|
23
|
-
super().__init__(*args, **kwargs)
|
|
24
|
-
self.all_models = []
|
|
25
|
-
self.chat_completion_models = []
|
|
26
|
-
self.supported_ft_models = []
|
|
27
|
-
self.default_model = anyscale_handler_config.DEFAULT_MODEL
|
|
28
|
-
self.api_base = anyscale_handler_config.ANYSCALE_API_BASE
|
|
29
|
-
self.default_mode = anyscale_handler_config.DEFAULT_MODE
|
|
30
|
-
self.supported_modes = anyscale_handler_config.SUPPORTED_MODES
|
|
31
|
-
self.rate_limit = anyscale_handler_config.RATE_LIMIT
|
|
32
|
-
self.max_batch_size = anyscale_handler_config.MAX_BATCH_SIZE
|
|
33
|
-
self.default_max_tokens = anyscale_handler_config.DEFAULT_MAX_TOKENS
|
|
34
|
-
|
|
35
|
-
def create_engine(self, connection_args: Dict) -> None:
|
|
36
|
-
"""
|
|
37
|
-
Validate the Anyscale Endpoints credentials on engine creation.
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
connection_args (Dict): Connection arguments.
|
|
41
|
-
|
|
42
|
-
Raises:
|
|
43
|
-
Exception: If the handler is not configured with valid API credentials.
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
None
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
connection_args = {k.lower(): v for k, v in connection_args.items()}
|
|
50
|
-
api_key = connection_args.get('anyscale_endpoints_api_key')
|
|
51
|
-
if api_key is not None:
|
|
52
|
-
org = connection_args.get('api_organization')
|
|
53
|
-
api_base = connection_args.get('api_base') or os.environ.get('ANYSCALE_API_BASE', anyscale_handler_config.ANYSCALE_API_BASE)
|
|
54
|
-
client = self._get_client(api_key=api_key, base_url=api_base, org=org)
|
|
55
|
-
OpenAIHandler._check_client_connection(client)
|
|
56
|
-
|
|
57
|
-
@staticmethod
|
|
58
|
-
def create_validation(target: Text, args: Optional[Dict] = None, **kwargs: Optional[Dict]) -> None:
|
|
59
|
-
"""
|
|
60
|
-
Validate the Anyscale Endpoints credentials on model creation.
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
target (Text): Target column, not required for LLMs.
|
|
64
|
-
args (Dict): Handler arguments.
|
|
65
|
-
kwargs (Dict): Handler keyword arguments.
|
|
66
|
-
|
|
67
|
-
Raises:
|
|
68
|
-
Exception: If the handler is not configured with valid API credentials.
|
|
69
|
-
|
|
70
|
-
Returns:
|
|
71
|
-
None
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
if 'using' not in args:
|
|
75
|
-
raise Exception(
|
|
76
|
-
"Anyscale Endpoints engine requires a USING clause! Refer to its documentation for more details."
|
|
77
|
-
)
|
|
78
|
-
else:
|
|
79
|
-
args = args['using']
|
|
80
|
-
|
|
81
|
-
engine_storage = kwargs['handler_storage']
|
|
82
|
-
connection_args = engine_storage.get_connection_args()
|
|
83
|
-
api_key = get_api_key('anyscale_endpoints', args, engine_storage=engine_storage)
|
|
84
|
-
api_base = connection_args.get('api_base') or args.get('api_base') or os.environ.get('ANYSCALE_API_BASE', anyscale_handler_config.ANYSCALE_API_BASE)
|
|
85
|
-
|
|
86
|
-
client = OpenAIHandler._get_client(api_key=api_key, base_url=api_base)
|
|
87
|
-
OpenAIHandler._check_client_connection(client)
|
|
88
|
-
|
|
89
|
-
def create(self, target: Text, args: Optional[Dict] = None, **kwargs: Optional[Dict]) -> None:
|
|
90
|
-
"""
|
|
91
|
-
Create a model via an engine.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
target (Text): Target column.
|
|
95
|
-
args (Dict): Model arguments.
|
|
96
|
-
kwargs (Dict): Other arguments.
|
|
97
|
-
|
|
98
|
-
Returns:
|
|
99
|
-
None
|
|
100
|
-
"""
|
|
101
|
-
|
|
102
|
-
# Set the base and fine-tuned models and call the parent method
|
|
103
|
-
self._set_models_from_args(args)
|
|
104
|
-
super().create(target, args, **kwargs)
|
|
105
|
-
|
|
106
|
-
def predict(self, df: pd.DataFrame, args: Optional[Dict] = None) -> pd.DataFrame:
|
|
107
|
-
"""
|
|
108
|
-
Make a prediction using a model.
|
|
109
|
-
|
|
110
|
-
Args:
|
|
111
|
-
df (pd.DataFrame): Input data.
|
|
112
|
-
args (Dict): Handler arguments.
|
|
113
|
-
|
|
114
|
-
Returns:
|
|
115
|
-
pd.DataFrame: Predicted data
|
|
116
|
-
"""
|
|
117
|
-
|
|
118
|
-
# Set the base and fine-tuned models and call the parent method
|
|
119
|
-
self._set_models_from_args(args)
|
|
120
|
-
return super().predict(df, args)
|
|
121
|
-
|
|
122
|
-
def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
|
|
123
|
-
"""
|
|
124
|
-
Fine-tune a supported model.
|
|
125
|
-
|
|
126
|
-
Args:
|
|
127
|
-
df (pd.DataFrame): Input data.
|
|
128
|
-
args (Dict): Handler arguments.
|
|
129
|
-
|
|
130
|
-
Returns:
|
|
131
|
-
None
|
|
132
|
-
"""
|
|
133
|
-
|
|
134
|
-
using_args = args.get('using', {})
|
|
135
|
-
self._set_models(using_args)
|
|
136
|
-
super().finetune(df, args)
|
|
137
|
-
|
|
138
|
-
# Rewrite chat_completion_models to include the newly fine-tuned model
|
|
139
|
-
args = self.model_storage.json_get('args')
|
|
140
|
-
args['chat_completion_models'] = list(self.chat_completion_models) + [args['model_name']]
|
|
141
|
-
self.model_storage.json_set('args', args)
|
|
142
|
-
|
|
143
|
-
def describe(self, attribute: Optional[Text] = None) -> pd.DataFrame:
|
|
144
|
-
"""
|
|
145
|
-
Describe a model or its metadata.
|
|
146
|
-
|
|
147
|
-
Args:
|
|
148
|
-
attribute (Text): Attribute to describe.
|
|
149
|
-
|
|
150
|
-
Returns:
|
|
151
|
-
pd.DataFrame: Model or metadata description.
|
|
152
|
-
"""
|
|
153
|
-
|
|
154
|
-
args = self.model_storage.json_get('args')
|
|
155
|
-
|
|
156
|
-
# Remove keys from args to display
|
|
157
|
-
for arg in ('api_key', 'openai_api_key'):
|
|
158
|
-
if arg in args:
|
|
159
|
-
del args[arg]
|
|
160
|
-
|
|
161
|
-
if attribute == 'args':
|
|
162
|
-
return pd.DataFrame(args.items(), columns=['key', 'value'])
|
|
163
|
-
elif attribute == 'metadata':
|
|
164
|
-
# The URL is used as some models require completing a form to access their artifacts
|
|
165
|
-
model_name = args.get('model_name', self.default_model)
|
|
166
|
-
model_card_url = 'https://huggingface.co/' + model_name
|
|
167
|
-
return pd.DataFrame({'model_name': [model_name], 'model_card': [model_card_url]})
|
|
168
|
-
else:
|
|
169
|
-
tables = ['args', 'metadata']
|
|
170
|
-
return pd.DataFrame(tables, columns=['tables'])
|
|
171
|
-
|
|
172
|
-
def _set_models(self, args: Dict) -> None:
|
|
173
|
-
"""
|
|
174
|
-
Set the base and fine-tuned models.
|
|
175
|
-
|
|
176
|
-
Args:
|
|
177
|
-
args (Dict): Model arguments.
|
|
178
|
-
|
|
179
|
-
Returns:
|
|
180
|
-
None
|
|
181
|
-
"""
|
|
182
|
-
|
|
183
|
-
api_key = get_api_key('anyscale_endpoints', args, self.engine_storage)
|
|
184
|
-
client = OpenAIHandler._get_client(api_key=api_key, base_url=self.api_base)
|
|
185
|
-
self.all_models = [m.id for m in client.models.list()]
|
|
186
|
-
self.chat_completion_models = [m.id for m in client.models.list() if m.rayllm_metadata['engine_config']['model_type'] == 'text-generation'] # noqa
|
|
187
|
-
# Set base models compatible with fine-tuning
|
|
188
|
-
self.supported_ft_models = self.chat_completion_models
|
|
189
|
-
|
|
190
|
-
def _set_models_from_args(self, args: Dict) -> None:
|
|
191
|
-
"""
|
|
192
|
-
Set the base and fine-tuned models from the arguments, if specified. Otherwise, use the default list.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
args (Dict): Model arguments.
|
|
196
|
-
|
|
197
|
-
Returns:
|
|
198
|
-
None
|
|
199
|
-
"""
|
|
200
|
-
|
|
201
|
-
self._set_models(args.get('using', {}))
|
|
202
|
-
|
|
203
|
-
# Update the models if they are specified in the arguments
|
|
204
|
-
model_args = self.model_storage.json_get('args')
|
|
205
|
-
if model_args and 'chat_completion_models' in model_args:
|
|
206
|
-
self.chat_completion_models = model_args.get('chat_completion_models')
|
|
207
|
-
|
|
208
|
-
@staticmethod
|
|
209
|
-
def _prepare_ft_jsonl(df, temp_storage_path: Text, temp_filename: Text, _, test_size: Optional[float] = 0.2) -> Dict:
|
|
210
|
-
"""
|
|
211
|
-
Prepare the data for fine-tuning.
|
|
212
|
-
|
|
213
|
-
Args:
|
|
214
|
-
df (pd.DataFrame): Input data.
|
|
215
|
-
temp_storage_path (Text): Temporary storage path.
|
|
216
|
-
temp_filename (Text): Temporary filename.
|
|
217
|
-
_: Unused.
|
|
218
|
-
test_size (float): Test size.
|
|
219
|
-
|
|
220
|
-
Returns:
|
|
221
|
-
dict: File names mapped to the prepared data.
|
|
222
|
-
"""
|
|
223
|
-
|
|
224
|
-
# 1. Format data
|
|
225
|
-
chats = ft_formatter(df)
|
|
226
|
-
|
|
227
|
-
# 2. Split chats in training and validation subsets
|
|
228
|
-
series = pd.Series(chats)
|
|
229
|
-
if len(series) < anyscale_handler_config.MIN_FT_DATASET_LEN:
|
|
230
|
-
raise Exception(f"Dataset is too small to finetune. Please include at least {anyscale_handler_config.MIN_FT_DATASET_LEN} samples (complete chats).")
|
|
231
|
-
val_size = max(anyscale_handler_config.MIN_FT_VAL_LEN, int(len(series) * test_size)) # at least as many samples as required by Anyscale
|
|
232
|
-
train = series.iloc[:-val_size]
|
|
233
|
-
val = series.iloc[-val_size:]
|
|
234
|
-
|
|
235
|
-
# 3. Write as JSONL files
|
|
236
|
-
file_names = {
|
|
237
|
-
'train': f'{temp_filename}_prepared_train.jsonl',
|
|
238
|
-
'val': f'{temp_filename}_prepared_valid.jsonl',
|
|
239
|
-
}
|
|
240
|
-
train.to_json(os.path.join(temp_storage_path, file_names['train']), orient='records', lines=True)
|
|
241
|
-
val.to_json(os.path.join(temp_storage_path, file_names['val']), orient='records', lines=True)
|
|
242
|
-
|
|
243
|
-
# 4. Validate and return
|
|
244
|
-
with open(os.path.join(temp_storage_path, file_names['train']), 'r', encoding='utf-8') as f:
|
|
245
|
-
ft_jsonl_validation([json.loads(line) for line in f])
|
|
246
|
-
|
|
247
|
-
with open(os.path.join(temp_storage_path, file_names['val']), 'r', encoding='utf-8') as f:
|
|
248
|
-
ft_jsonl_validation([json.loads(line) for line in f])
|
|
249
|
-
|
|
250
|
-
return file_names
|
|
251
|
-
|
|
252
|
-
def _get_ft_model_type(self, model_name: Text) -> Text:
|
|
253
|
-
"""
|
|
254
|
-
Get the fine-tuning model type.
|
|
255
|
-
|
|
256
|
-
Args:
|
|
257
|
-
model_name (Text): Model name.
|
|
258
|
-
|
|
259
|
-
Returns:
|
|
260
|
-
Text: Model type.
|
|
261
|
-
"""
|
|
262
|
-
|
|
263
|
-
for base_model in self.chat_completion_models:
|
|
264
|
-
if base_model.lower() in model_name.lower():
|
|
265
|
-
return base_model
|
|
266
|
-
logger.warning(f'Cannot recognize model {model_name}. Finetuning may fail.')
|
|
267
|
-
return model_name.lower()
|
|
268
|
-
|
|
269
|
-
@staticmethod
|
|
270
|
-
def _add_extra_ft_params(ft_params: Dict, using_args: Dict) -> Dict:
|
|
271
|
-
"""
|
|
272
|
-
Add extra fine-tuning parameters.
|
|
273
|
-
|
|
274
|
-
Args:
|
|
275
|
-
ft_params (Dict): Fine-tuning parameters.
|
|
276
|
-
using_args (Dict): Model arguments.
|
|
277
|
-
|
|
278
|
-
Returns:
|
|
279
|
-
Dict: Fine-tuning parameters with extra parameters.
|
|
280
|
-
"""
|
|
281
|
-
|
|
282
|
-
hyperparameters = {}
|
|
283
|
-
# Populate separately because keys with `None` break the API
|
|
284
|
-
for key in ('n_epochs', 'context_length'):
|
|
285
|
-
if using_args.get(key, None):
|
|
286
|
-
hyperparameters[key] = using_args[key]
|
|
287
|
-
if hyperparameters:
|
|
288
|
-
return {**ft_params, **{'hyperparameters': hyperparameters}}
|
|
289
|
-
else:
|
|
290
|
-
return ft_params
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from collections import OrderedDict
|
|
2
|
-
|
|
3
|
-
from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
creation_args = OrderedDict(
|
|
7
|
-
anyscale_endpoints_api_key={
|
|
8
|
-
'type': ARG_TYPE.STR,
|
|
9
|
-
'description': 'Key for anyscale endpoints.',
|
|
10
|
-
'required': False,
|
|
11
|
-
'label': 'Anyscale endpoints API key',
|
|
12
|
-
'secret': True
|
|
13
|
-
}
|
|
14
|
-
)
|
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
|
|
2
|
-
<path d="M13.3722 21.826H4.85157C2.13017 21.826 0 23.9511 0 26.6661V35.1627C0 37.8752 2.13017 40.0003 4.85157 40.0003H13.3722C16.0949 40.0003 18.2251 37.8752 18.2251 35.1627V26.6636C18.2251 23.9498 15.9756 21.826 13.3722 21.826ZM15.2664 35.0427C15.2664 36.1052 14.438 36.9302 13.3722 36.9302H4.85157C4.60193 36.9332 4.35421 36.8865 4.12301 36.7926C3.89181 36.6988 3.68182 36.5599 3.5054 36.3839C3.32898 36.208 3.18972 35.9987 3.09582 35.7684C3.00191 35.538 2.95527 35.2912 2.95864 35.0427V26.5474C2.95864 25.4849 3.78711 24.6598 4.85157 24.6598H13.3722C14.438 24.6598 15.2664 25.4849 15.2664 26.5474V35.0427Z" fill="#2055BE"/>
|
|
3
|
-
<path d="M35.2664 14.9855H25.0888V4.83765C24.9696 2.12507 22.8406 0 20.118 0H4.85157C2.13017 0 0 2.12507 0 4.83765V13.0979C0 15.8117 2.13017 17.9356 4.85157 17.9356H22.0109V35.0448C22.0109 37.7587 24.1423 39.8825 26.8637 39.8825H35.1484C37.8698 39.8825 40 37.7587 40 35.0448V19.8231C40 17.228 37.8698 15.1042 35.2664 14.9855ZM4.85157 14.9855C4.60193 14.9885 4.35421 14.9417 4.12301 14.8479C3.89181 14.7541 3.68182 14.6151 3.5054 14.4392C3.32898 14.2633 3.18972 14.054 3.09582 13.8236C3.00191 13.5933 2.95527 13.3465 2.95864 13.0979V4.83765C2.95864 3.77512 3.78711 2.95009 4.85157 2.95009H20.118C21.1837 2.95009 22.0122 3.77512 22.0122 4.83765V14.9855H4.85157ZM37.1594 35.0448C37.1594 36.1074 36.3309 36.9324 35.2664 36.9324H26.9817C26.7321 36.9354 26.4844 36.8886 26.2532 36.7948C26.022 36.701 25.812 36.562 25.6356 36.3861C25.4592 36.2102 25.3199 36.0009 25.226 35.7705C25.1321 35.5402 25.0854 35.2934 25.0888 35.0448V17.9356H35.2664C36.3309 17.9356 37.1594 18.7606 37.1594 19.8231V35.0448Z" fill="#2055BE"/>
|
|
4
|
-
</svg>
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
from typing import Text, List
|
|
2
|
-
from pydantic_settings import BaseSettings
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class AnyscaleHandlerConfig(BaseSettings):
|
|
6
|
-
"""
|
|
7
|
-
Configuration for the Anyscale handler.
|
|
8
|
-
|
|
9
|
-
Attributes
|
|
10
|
-
----------
|
|
11
|
-
|
|
12
|
-
ANYSCALE_API_BASE : Text
|
|
13
|
-
Base URL for the Anyscale API.
|
|
14
|
-
|
|
15
|
-
MIN_FT_VAL_LEN : int
|
|
16
|
-
Minimum number of validation chats required for fine-tuning.
|
|
17
|
-
|
|
18
|
-
MIN_FT_DATASET_LEN : int
|
|
19
|
-
Minimum number of training and validation chats required for fine-tuning.
|
|
20
|
-
|
|
21
|
-
DEFAULT_MODEL : Text
|
|
22
|
-
Default model to use for models.
|
|
23
|
-
|
|
24
|
-
DEFAULT_MODE : Text
|
|
25
|
-
Default mode to use for models. Can be 'default', 'conversational', or 'conversational-full'.
|
|
26
|
-
|
|
27
|
-
SUPPORTED_MODES : List
|
|
28
|
-
List of supported modes. Can be 'default', 'conversational', or 'conversational-full'.
|
|
29
|
-
|
|
30
|
-
RATE_LIMIT : int
|
|
31
|
-
Number of requests per minute.
|
|
32
|
-
|
|
33
|
-
MAX_BATCH_SIZE : int
|
|
34
|
-
Maximum batch size for requests.
|
|
35
|
-
|
|
36
|
-
DEFAULT_MAX_TOKENS : int
|
|
37
|
-
Default maximum tokens for requests.
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
ANYSCALE_API_BASE: Text = 'https://api.endpoints.anyscale.com/v1'
|
|
41
|
-
MIN_FT_VAL_LEN: int = 20
|
|
42
|
-
MIN_FT_DATASET_LEN: int = MIN_FT_VAL_LEN * 2
|
|
43
|
-
DEFAULT_MODEL: Text = 'meta-llama/Llama-2-7b-chat-hf'
|
|
44
|
-
DEFAULT_MODE: Text = 'default'
|
|
45
|
-
SUPPORTED_MODES: List = ['default', 'conversational', 'conversational-full']
|
|
46
|
-
RATE_LIMIT: int = 25
|
|
47
|
-
MAX_BATCH_SIZE: int = 20
|
|
48
|
-
DEFAULT_MAX_TOKENS: int = 100
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
anyscale_handler_config = AnyscaleHandlerConfig()
|
mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py
DELETED
|
@@ -1,212 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pytest
|
|
3
|
-
import pandas as pd
|
|
4
|
-
from unittest.mock import patch
|
|
5
|
-
|
|
6
|
-
from tests.unit.ml_handlers.base_ml_test import BaseMLAPITest
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@pytest.mark.skipif(os.environ.get('MDB_TEST_ANYSCALE_ENDPOINTS_API_KEY') is None, reason='Missing API key!')
|
|
10
|
-
class TestAnyscaleEndpoints(BaseMLAPITest):
|
|
11
|
-
"""
|
|
12
|
-
Integration tests for Anyscale Endpoints AI engine.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
def setup_method(self):
|
|
16
|
-
"""
|
|
17
|
-
Setup test environment by creating a project and a Anyscale Endpoints engine.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
super().setup_method()
|
|
21
|
-
self.run_sql("CREATE DATABASE proj")
|
|
22
|
-
self.run_sql(
|
|
23
|
-
f"""
|
|
24
|
-
CREATE ML_ENGINE anyscale_endpoints_engine
|
|
25
|
-
FROM anyscale_endpoints
|
|
26
|
-
USING
|
|
27
|
-
anyscale_endpoints_api_key = '{self.get_api_key('MDB_TEST_ANYSCALE_ENDPOINTS_API_KEY')}';
|
|
28
|
-
"""
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
def test_create_model_raises_exception_with_invalid_model_parameter(self):
|
|
32
|
-
"""
|
|
33
|
-
Test for invalid parameter during model creation.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
self.run_sql(
|
|
37
|
-
"""
|
|
38
|
-
CREATE MODEL proj.test_anyscale_invalid_parameter_model
|
|
39
|
-
PREDICT answer
|
|
40
|
-
USING
|
|
41
|
-
engine='anyscale_endpoints_engine',
|
|
42
|
-
model_name='this-model-does-not-exist',
|
|
43
|
-
prompt_template='dummy_prompt_template';
|
|
44
|
-
"""
|
|
45
|
-
)
|
|
46
|
-
with pytest.raises(Exception):
|
|
47
|
-
self.wait_predictor("proj", "test_anyscale_invalid_parameter_model")
|
|
48
|
-
|
|
49
|
-
# TODO: Uncomment this once the handler is updated to handle unknown arguments.
|
|
50
|
-
# def test_create_model_raises_exception_with_unknown_model_argument(self):
|
|
51
|
-
# """
|
|
52
|
-
# Test for unknown argument during model creation.
|
|
53
|
-
# """
|
|
54
|
-
|
|
55
|
-
# self.run_sql(
|
|
56
|
-
# """
|
|
57
|
-
# CREATE MODEL proj.test_anyscale_unknown_argument_model
|
|
58
|
-
# PREDICT answer
|
|
59
|
-
# USING
|
|
60
|
-
# engine='anyscale_endpoints_engine',
|
|
61
|
-
# prompt_template='dummy_prompt_template',
|
|
62
|
-
# evidently_wrong_argument='wrong value';
|
|
63
|
-
# """
|
|
64
|
-
# )
|
|
65
|
-
# with pytest.raises(Exception):
|
|
66
|
-
# self.wait_predictor("proj", "test_anyscale_unknown_argument_model")
|
|
67
|
-
|
|
68
|
-
def test_create_model_raises_exception_with_invalid_operation_mode(self):
|
|
69
|
-
"""
|
|
70
|
-
Test for invalid operation mode during model creation.
|
|
71
|
-
"""
|
|
72
|
-
|
|
73
|
-
self.run_sql(
|
|
74
|
-
"""
|
|
75
|
-
CREATE MODEL proj.test_anyscale_invalid_operation_mode
|
|
76
|
-
PREDICT answer
|
|
77
|
-
USING
|
|
78
|
-
engine='anyscale_endpoints_engine',
|
|
79
|
-
prompt_template='dummy_prompt_template',
|
|
80
|
-
mode='invalid_mode';
|
|
81
|
-
"""
|
|
82
|
-
)
|
|
83
|
-
with pytest.raises(Exception):
|
|
84
|
-
self.wait_predictor("proj", "test_anyscale_invalid_operation_mode")
|
|
85
|
-
|
|
86
|
-
def test_select_runs_no_errors_on_completion_sentiment_analysis_single(self):
|
|
87
|
-
"""
|
|
88
|
-
Test for a valid response to a sentiment analysis task (completion).
|
|
89
|
-
"""
|
|
90
|
-
|
|
91
|
-
self.run_sql(
|
|
92
|
-
"""
|
|
93
|
-
CREATE MODEL proj.test_anyscale_single_sa
|
|
94
|
-
PREDICT sentiment
|
|
95
|
-
USING
|
|
96
|
-
engine='anyscale_endpoints_engine',
|
|
97
|
-
model_name = 'mistralai/Mistral-7B-Instruct-v0.1',
|
|
98
|
-
prompt_template = 'Classify the sentiment of the following text as one of `positive`, `neutral` or `negative`: {{text}}';
|
|
99
|
-
"""
|
|
100
|
-
)
|
|
101
|
-
self.wait_predictor("proj", "test_anyscale_single_sa")
|
|
102
|
-
|
|
103
|
-
result_df = self.run_sql(
|
|
104
|
-
"""
|
|
105
|
-
SELECT sentiment
|
|
106
|
-
FROM proj.test_anyscale_single_sa
|
|
107
|
-
WHERE text = 'I love machine learning!';
|
|
108
|
-
"""
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
assert "positive" in result_df["sentiment"].iloc[0].lower()
|
|
112
|
-
|
|
113
|
-
@patch("mindsdb.integrations.handlers.postgres_handler.Handler")
|
|
114
|
-
def test_select_runs_no_errors_on_completion_sentiment_analysis_bulk(self, mock_postgres_handler):
|
|
115
|
-
"""
|
|
116
|
-
Test for valid reponses to bulk questions in a sentiment analysis task (completion).
|
|
117
|
-
"""
|
|
118
|
-
|
|
119
|
-
df = pd.DataFrame.from_dict({"text": [
|
|
120
|
-
"I love machine learning!",
|
|
121
|
-
"I hate slow internet connections!"
|
|
122
|
-
]})
|
|
123
|
-
self.set_handler(mock_postgres_handler, name="pg", tables={"df": df})
|
|
124
|
-
|
|
125
|
-
self.run_sql(
|
|
126
|
-
"""
|
|
127
|
-
CREATE MODEL proj.test_anyscale_bulk_sa
|
|
128
|
-
PREDICT sentiment
|
|
129
|
-
USING
|
|
130
|
-
engine='anyscale_endpoints_engine',
|
|
131
|
-
model_name = 'mistralai/Mistral-7B-Instruct-v0.1',
|
|
132
|
-
prompt_template = 'Classify the sentiment of the following text as one of `positive`, `neutral` or `negative`: {{text}}';
|
|
133
|
-
"""
|
|
134
|
-
)
|
|
135
|
-
self.wait_predictor("proj", "test_anyscale_bulk_sa")
|
|
136
|
-
|
|
137
|
-
result_df = self.run_sql(
|
|
138
|
-
"""
|
|
139
|
-
SELECT p.sentiment
|
|
140
|
-
FROM pg.df as t
|
|
141
|
-
JOIN proj.test_anyscale_bulk_sa as p;
|
|
142
|
-
"""
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
assert "positive" in result_df["sentiment"].iloc[0].lower()
|
|
146
|
-
assert "negative" in result_df["sentiment"].iloc[1].lower()
|
|
147
|
-
|
|
148
|
-
def test_select_runs_no_errors_on_chat_completion_question_answering_single(self):
|
|
149
|
-
"""
|
|
150
|
-
Test for a valid response to a question answering task (chat completion).
|
|
151
|
-
"""
|
|
152
|
-
|
|
153
|
-
self.run_sql(
|
|
154
|
-
"""
|
|
155
|
-
CREATE MODEL proj.test_anyscale_single_qa
|
|
156
|
-
PREDICT answer
|
|
157
|
-
USING
|
|
158
|
-
engine='anyscale_endpoints_engine',
|
|
159
|
-
model_name = 'mistralai/Mistral-7B-Instruct-v0.1',
|
|
160
|
-
question_column='question';
|
|
161
|
-
"""
|
|
162
|
-
)
|
|
163
|
-
self.wait_predictor("proj", "test_anyscale_single_qa")
|
|
164
|
-
|
|
165
|
-
result_df = self.run_sql(
|
|
166
|
-
"""
|
|
167
|
-
SELECT answer
|
|
168
|
-
FROM proj.test_anyscale_single_qa
|
|
169
|
-
WHERE question = 'What is the capital of Sweden?';
|
|
170
|
-
"""
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
assert "stockholm" in result_df["answer"].iloc[0].lower()
|
|
174
|
-
|
|
175
|
-
@patch("mindsdb.integrations.handlers.postgres_handler.Handler")
|
|
176
|
-
def test_select_runs_no_errors_on_chat_completion_question_answering_bulk(self, mock_postgres_handler):
|
|
177
|
-
"""
|
|
178
|
-
Test for valid reponses to bulk questions in a question answering task (chat completion).
|
|
179
|
-
"""
|
|
180
|
-
|
|
181
|
-
df = pd.DataFrame.from_dict({"question": [
|
|
182
|
-
"What is the capital of Sweden?",
|
|
183
|
-
"What is the second planet in the solar system?"
|
|
184
|
-
]})
|
|
185
|
-
self.set_handler(mock_postgres_handler, name="pg", tables={"df": df})
|
|
186
|
-
|
|
187
|
-
self.run_sql(
|
|
188
|
-
"""
|
|
189
|
-
CREATE MODEL proj.test_anyscale_bulk_qa
|
|
190
|
-
PREDICT answer
|
|
191
|
-
USING
|
|
192
|
-
engine='anyscale_endpoints_engine',
|
|
193
|
-
model_name = 'mistralai/Mistral-7B-Instruct-v0.1',
|
|
194
|
-
question_column='question';
|
|
195
|
-
"""
|
|
196
|
-
)
|
|
197
|
-
self.wait_predictor("proj", "test_anyscale_bulk_qa")
|
|
198
|
-
|
|
199
|
-
result_df = self.run_sql(
|
|
200
|
-
"""
|
|
201
|
-
SELECT p.answer
|
|
202
|
-
FROM pg.df as t
|
|
203
|
-
JOIN proj.test_anyscale_bulk_qa as p;
|
|
204
|
-
"""
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
assert "stockholm" in result_df["answer"].iloc[0].lower()
|
|
208
|
-
assert "venus" in result_df["answer"].iloc[1].lower()
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
if __name__ == '__main__':
|
|
212
|
-
pytest.main([__file__])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|