MindsDB 25.8.3.0__py3-none-any.whl → 25.9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +3 -45
- mindsdb/api/a2a/__init__.py +52 -0
- mindsdb/api/a2a/agent.py +11 -12
- mindsdb/api/a2a/common/server/server.py +17 -36
- mindsdb/api/a2a/common/server/task_manager.py +14 -28
- mindsdb/api/a2a/task_manager.py +20 -21
- mindsdb/api/a2a/utils.py +1 -1
- mindsdb/api/common/middleware.py +106 -0
- mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +466 -18
- mindsdb/api/executor/utilities/sql.py +9 -31
- mindsdb/api/http/initialize.py +34 -43
- mindsdb/api/http/namespaces/auth.py +6 -14
- mindsdb/api/http/namespaces/config.py +0 -2
- mindsdb/api/http/namespaces/default.py +74 -106
- mindsdb/api/http/namespaces/file.py +9 -3
- mindsdb/api/http/namespaces/handlers.py +77 -87
- mindsdb/api/http/start.py +29 -47
- mindsdb/api/litellm/start.py +11 -10
- mindsdb/api/mcp/__init__.py +165 -0
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +33 -64
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +86 -85
- mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/crate_handler/crate_handler.py +3 -7
- mindsdb/integrations/handlers/derby_handler/derby_handler.py +32 -34
- mindsdb/integrations/handlers/documentdb_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/dummy_data_handler/dummy_data_handler.py +12 -13
- mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/google_books_handler/google_books_handler.py +45 -44
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +101 -95
- mindsdb/integrations/handlers/google_content_shopping_handler/google_content_shopping_handler.py +129 -129
- mindsdb/integrations/handlers/google_fit_handler/google_fit_handler.py +59 -43
- mindsdb/integrations/handlers/google_search_handler/google_search_handler.py +38 -39
- mindsdb/integrations/handlers/informix_handler/informix_handler.py +5 -18
- mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/maxdb_handler/maxdb_handler.py +22 -28
- mindsdb/integrations/handlers/monetdb_handler/monetdb_handler.py +3 -7
- mindsdb/integrations/handlers/mongodb_handler/mongodb_handler.py +53 -67
- mindsdb/integrations/handlers/mongodb_handler/requirements.txt +1 -0
- mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_ast.py +43 -68
- mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_parser.py +17 -25
- mindsdb/{api/mongo/utilities → integrations/handlers/mongodb_handler/utils}/mongodb_query.py +10 -16
- mindsdb/integrations/handlers/mongodb_handler/utils/mongodb_render.py +43 -69
- mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
- mindsdb/integrations/libs/base.py +1 -1
- mindsdb/integrations/libs/llm/config.py +15 -0
- mindsdb/integrations/libs/llm/utils.py +15 -0
- mindsdb/interfaces/agents/constants.py +1 -0
- mindsdb/interfaces/agents/langchain_agent.py +4 -0
- mindsdb/interfaces/agents/providers.py +20 -0
- mindsdb/interfaces/knowledge_base/controller.py +25 -7
- mindsdb/utilities/config.py +15 -158
- mindsdb/utilities/log.py +0 -25
- mindsdb/utilities/render/sqlalchemy_render.py +7 -1
- mindsdb/utilities/starters.py +0 -39
- {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/METADATA +269 -267
- {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/RECORD +62 -105
- mindsdb/api/a2a/__main__.py +0 -144
- mindsdb/api/a2a/run_a2a.py +0 -86
- mindsdb/api/common/check_auth.py +0 -42
- mindsdb/api/http/gunicorn_wrapper.py +0 -17
- mindsdb/api/mcp/start.py +0 -205
- mindsdb/api/mongo/__init__.py +0 -0
- mindsdb/api/mongo/classes/__init__.py +0 -5
- mindsdb/api/mongo/classes/query_sql.py +0 -19
- mindsdb/api/mongo/classes/responder.py +0 -45
- mindsdb/api/mongo/classes/responder_collection.py +0 -34
- mindsdb/api/mongo/classes/scram.py +0 -86
- mindsdb/api/mongo/classes/session.py +0 -23
- mindsdb/api/mongo/functions/__init__.py +0 -19
- mindsdb/api/mongo/responders/__init__.py +0 -73
- mindsdb/api/mongo/responders/add_shard.py +0 -13
- mindsdb/api/mongo/responders/aggregate.py +0 -90
- mindsdb/api/mongo/responders/buildinfo.py +0 -17
- mindsdb/api/mongo/responders/coll_stats.py +0 -63
- mindsdb/api/mongo/responders/company_id.py +0 -25
- mindsdb/api/mongo/responders/connection_status.py +0 -22
- mindsdb/api/mongo/responders/count.py +0 -21
- mindsdb/api/mongo/responders/db_stats.py +0 -32
- mindsdb/api/mongo/responders/delete.py +0 -105
- mindsdb/api/mongo/responders/describe.py +0 -23
- mindsdb/api/mongo/responders/end_sessions.py +0 -13
- mindsdb/api/mongo/responders/find.py +0 -175
- mindsdb/api/mongo/responders/get_cmd_line_opts.py +0 -18
- mindsdb/api/mongo/responders/get_free_monitoring_status.py +0 -14
- mindsdb/api/mongo/responders/get_parameter.py +0 -23
- mindsdb/api/mongo/responders/getlog.py +0 -14
- mindsdb/api/mongo/responders/host_info.py +0 -28
- mindsdb/api/mongo/responders/insert.py +0 -270
- mindsdb/api/mongo/responders/is_master.py +0 -20
- mindsdb/api/mongo/responders/is_master_lower.py +0 -13
- mindsdb/api/mongo/responders/list_collections.py +0 -55
- mindsdb/api/mongo/responders/list_databases.py +0 -37
- mindsdb/api/mongo/responders/list_indexes.py +0 -22
- mindsdb/api/mongo/responders/ping.py +0 -13
- mindsdb/api/mongo/responders/recv_chunk_start.py +0 -13
- mindsdb/api/mongo/responders/replsetgetstatus.py +0 -13
- mindsdb/api/mongo/responders/sasl_continue.py +0 -34
- mindsdb/api/mongo/responders/sasl_start.py +0 -33
- mindsdb/api/mongo/responders/update_range_deletions.py +0 -12
- mindsdb/api/mongo/responders/whatsmyuri.py +0 -18
- mindsdb/api/mongo/server.py +0 -388
- mindsdb/api/mongo/start.py +0 -15
- mindsdb/api/mongo/utilities/__init__.py +0 -0
- {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/WHEEL +0 -0
- {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.8.3.0.dist-info → mindsdb-25.9.1.1.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ from bson.objectid import ObjectId
|
|
|
5
5
|
from mindsdb_sql_parser.ast import Select, Update, Identifier, Star, Constant, Tuple, BinaryOperation, Latest, TypeCast
|
|
6
6
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
7
7
|
|
|
8
|
-
from mindsdb.
|
|
8
|
+
from mindsdb.integrations.handlers.mongodb_handler.utils.mongodb_query import MongoQuery
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class MongodbRender:
|
|
@@ -27,7 +27,7 @@ class MongodbRender:
|
|
|
27
27
|
return self.select(node)
|
|
28
28
|
elif isinstance(node, Update):
|
|
29
29
|
return self.update(node)
|
|
30
|
-
raise NotImplementedError(f
|
|
30
|
+
raise NotImplementedError(f"Unknown statement: {node.__class__.__name__}")
|
|
31
31
|
|
|
32
32
|
def update(self, node: Update) -> MongoQuery:
|
|
33
33
|
"""
|
|
@@ -43,17 +43,8 @@ class MongodbRender:
|
|
|
43
43
|
mquery = MongoQuery(collection)
|
|
44
44
|
|
|
45
45
|
filters = self.handle_where(node.where)
|
|
46
|
-
row = {
|
|
47
|
-
|
|
48
|
-
for k, v in node.update_columns.items()
|
|
49
|
-
}
|
|
50
|
-
mquery.add_step({
|
|
51
|
-
'method': 'update_many',
|
|
52
|
-
'args': [
|
|
53
|
-
filters,
|
|
54
|
-
{"$set": row}
|
|
55
|
-
]
|
|
56
|
-
})
|
|
46
|
+
row = {k: v.value for k, v in node.update_columns.items()}
|
|
47
|
+
mquery.add_step({"method": "update_many", "args": [filters, {"$set": row}]})
|
|
57
48
|
return mquery
|
|
58
49
|
|
|
59
50
|
def select(self, node: Select):
|
|
@@ -67,7 +58,7 @@ class MongodbRender:
|
|
|
67
58
|
MongoQuery: The converted MongoQuery instance.
|
|
68
59
|
"""
|
|
69
60
|
if not isinstance(node.from_table, Identifier):
|
|
70
|
-
raise NotImplementedError(f
|
|
61
|
+
raise NotImplementedError(f"Not supported from {node.from_table}")
|
|
71
62
|
|
|
72
63
|
collection = node.from_table.parts[-1]
|
|
73
64
|
|
|
@@ -77,10 +68,10 @@ class MongodbRender:
|
|
|
77
68
|
filters = self.handle_where(node.where)
|
|
78
69
|
|
|
79
70
|
group = {}
|
|
80
|
-
project = {
|
|
71
|
+
project = {"_id": 0} # Hide _id field when it has not been explicitly requested.
|
|
81
72
|
if node.distinct:
|
|
82
73
|
# Group by distinct fields.
|
|
83
|
-
group = {
|
|
74
|
+
group = {"_id": {}}
|
|
84
75
|
|
|
85
76
|
if node.targets is not None:
|
|
86
77
|
for col in node.targets:
|
|
@@ -95,12 +86,12 @@ class MongodbRender:
|
|
|
95
86
|
else:
|
|
96
87
|
alias = col.alias.parts[-1]
|
|
97
88
|
|
|
98
|
-
project[alias] = f
|
|
89
|
+
project[alias] = f"${name}" # Project field.
|
|
99
90
|
|
|
100
91
|
# Group by distinct fields.
|
|
101
92
|
if node.distinct:
|
|
102
|
-
group[
|
|
103
|
-
group[name] = {
|
|
93
|
+
group["_id"][name] = f"${name}" # Group field.
|
|
94
|
+
group[name] = {"$first": f"${name}"} # Show field.
|
|
104
95
|
|
|
105
96
|
elif isinstance(col, Constant):
|
|
106
97
|
val = str(col.value) # Convert to string becuase it is interpreted as an index.
|
|
@@ -112,19 +103,19 @@ class MongodbRender:
|
|
|
112
103
|
|
|
113
104
|
if node.group_by is not None:
|
|
114
105
|
# TODO
|
|
115
|
-
raise NotImplementedError(f
|
|
106
|
+
raise NotImplementedError(f"Group {node.group_by}")
|
|
116
107
|
|
|
117
108
|
sort = {}
|
|
118
109
|
if node.order_by is not None:
|
|
119
110
|
for col in node.order_by:
|
|
120
111
|
name = col.field.parts[-1]
|
|
121
|
-
direction = 1 if col.direction.upper() ==
|
|
112
|
+
direction = 1 if col.direction.upper() == "ASC" else -1
|
|
122
113
|
sort[name] = direction
|
|
123
114
|
|
|
124
115
|
# Compose the MongoDB query.
|
|
125
116
|
mquery = MongoQuery(collection)
|
|
126
117
|
|
|
127
|
-
method =
|
|
118
|
+
method = "aggregate"
|
|
128
119
|
arg = []
|
|
129
120
|
|
|
130
121
|
# MongoDB related pipeline steps for the aggregate method.
|
|
@@ -150,10 +141,7 @@ class MongodbRender:
|
|
|
150
141
|
if node.limit is not None:
|
|
151
142
|
arg.append({"$limit": int(node.limit.value)})
|
|
152
143
|
|
|
153
|
-
mquery.add_step({
|
|
154
|
-
'method': method,
|
|
155
|
-
'args': [arg]
|
|
156
|
-
})
|
|
144
|
+
mquery.add_step({"method": method, "args": [arg]})
|
|
157
145
|
|
|
158
146
|
return mquery
|
|
159
147
|
|
|
@@ -168,34 +156,34 @@ class MongodbRender:
|
|
|
168
156
|
dict: The converted MongoDB query filters.
|
|
169
157
|
"""
|
|
170
158
|
# TODO: UnaryOperation, function.
|
|
171
|
-
if
|
|
172
|
-
raise NotImplementedError(f
|
|
159
|
+
if type(node) not in [BinaryOperation]:
|
|
160
|
+
raise NotImplementedError(f"Not supported type {type(node)}")
|
|
173
161
|
|
|
174
162
|
op = node.op.lower()
|
|
175
163
|
arg1, arg2 = node.args
|
|
176
164
|
|
|
177
|
-
if op in (
|
|
165
|
+
if op in ("and", "or"):
|
|
178
166
|
query1 = self.handle_where(arg1)
|
|
179
167
|
query2 = self.handle_where(arg2)
|
|
180
168
|
|
|
181
169
|
ops = {
|
|
182
|
-
|
|
183
|
-
|
|
170
|
+
"and": "$and",
|
|
171
|
+
"or": "$or",
|
|
184
172
|
}
|
|
185
173
|
query = {ops[op]: [query1, query2]}
|
|
186
174
|
return query
|
|
187
175
|
|
|
188
176
|
ops_map = {
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
177
|
+
">=": "$gte",
|
|
178
|
+
">": "$gt",
|
|
179
|
+
"<": "$lt",
|
|
180
|
+
"<=": "$lte",
|
|
181
|
+
"<>": "$ne",
|
|
182
|
+
"!=": "$ne",
|
|
183
|
+
"=": "$eq",
|
|
184
|
+
"==": "$eq",
|
|
185
|
+
"is": "$eq",
|
|
186
|
+
"is not": "$ne",
|
|
199
187
|
}
|
|
200
188
|
|
|
201
189
|
if isinstance(arg1, Identifier):
|
|
@@ -203,35 +191,29 @@ class MongodbRender:
|
|
|
203
191
|
# Simple operation.
|
|
204
192
|
if isinstance(arg2, Constant):
|
|
205
193
|
# Identifier and Constant.
|
|
206
|
-
val = ObjectId(arg2.value) if var_name ==
|
|
207
|
-
if op in (
|
|
194
|
+
val = ObjectId(arg2.value) if var_name == "_id" else arg2.value
|
|
195
|
+
if op in ("=", "=="):
|
|
208
196
|
pass
|
|
209
197
|
elif op in ops_map:
|
|
210
198
|
op2 = ops_map[op]
|
|
211
199
|
val = {op2: val}
|
|
212
200
|
else:
|
|
213
|
-
raise NotImplementedError(f
|
|
201
|
+
raise NotImplementedError(f"Not supported operator {op}")
|
|
214
202
|
|
|
215
203
|
return {var_name: val}
|
|
216
204
|
|
|
217
205
|
# IN condition.
|
|
218
206
|
elif isinstance(arg2, Tuple):
|
|
219
207
|
# Should be IN, NOT IN.
|
|
220
|
-
ops = {
|
|
221
|
-
'in': '$in',
|
|
222
|
-
'not in': '$nin'
|
|
223
|
-
}
|
|
208
|
+
ops = {"in": "$in", "not in": "$nin"}
|
|
224
209
|
# Must be list of Constants.
|
|
225
|
-
values = [
|
|
226
|
-
i.value
|
|
227
|
-
for i in arg2.items
|
|
228
|
-
]
|
|
210
|
+
values = [i.value for i in arg2.items]
|
|
229
211
|
|
|
230
212
|
if op in ops:
|
|
231
213
|
op2 = ops[op]
|
|
232
214
|
cond = {op2: values}
|
|
233
215
|
else:
|
|
234
|
-
raise NotImplementedError(f
|
|
216
|
+
raise NotImplementedError(f"Not supported operator {op}")
|
|
235
217
|
|
|
236
218
|
return {var_name: cond}
|
|
237
219
|
|
|
@@ -242,13 +224,9 @@ class MongodbRender:
|
|
|
242
224
|
if op in ops_map:
|
|
243
225
|
op2 = ops_map[op]
|
|
244
226
|
else:
|
|
245
|
-
raise NotImplementedError(f
|
|
227
|
+
raise NotImplementedError(f"Not supported operator {op}")
|
|
246
228
|
|
|
247
|
-
return {
|
|
248
|
-
'$expr': {
|
|
249
|
-
op2: [val1, val2]
|
|
250
|
-
}
|
|
251
|
-
}
|
|
229
|
+
return {"$expr": {op2: [val1, val2]}}
|
|
252
230
|
|
|
253
231
|
def where_element_convert(self, node: Union[Identifier, Latest, Constant, TypeCast]) -> Any:
|
|
254
232
|
"""
|
|
@@ -265,22 +243,18 @@ class MongodbRender:
|
|
|
265
243
|
RuntimeError: If the date format is not supported.
|
|
266
244
|
"""
|
|
267
245
|
if isinstance(node, Identifier):
|
|
268
|
-
return f
|
|
246
|
+
return f"${node.parts[-1]}"
|
|
269
247
|
elif isinstance(node, Latest):
|
|
270
|
-
return
|
|
248
|
+
return "LATEST"
|
|
271
249
|
elif isinstance(node, Constant):
|
|
272
250
|
return node.value
|
|
273
|
-
elif isinstance(node, TypeCast)
|
|
274
|
-
|
|
275
|
-
formats = [
|
|
276
|
-
"%Y-%m-%d",
|
|
277
|
-
"%Y-%m-%dT%H:%M:%S.%f"
|
|
278
|
-
]
|
|
251
|
+
elif isinstance(node, TypeCast) and node.type_name.upper() in ("DATE", "DATETIME"):
|
|
252
|
+
formats = ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S.%f"]
|
|
279
253
|
for format in formats:
|
|
280
254
|
try:
|
|
281
255
|
return dt.datetime.strptime(node.arg.value, format)
|
|
282
256
|
except ValueError:
|
|
283
257
|
pass
|
|
284
|
-
raise RuntimeError(f
|
|
258
|
+
raise RuntimeError(f"Not supported date format. Supported: {formats}")
|
|
285
259
|
else:
|
|
286
|
-
raise NotImplementedError(f
|
|
260
|
+
raise NotImplementedError(f"Unknown where element {node}")
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
tpot<=0.11.7
|
|
2
|
-
type_infer==0.0.
|
|
2
|
+
type_infer==0.0.25
|
|
@@ -113,3 +113,18 @@ class WriterConfig(BaseLLMConfig):
|
|
|
113
113
|
writer_api_key: Optional[str] = Field(default=None)
|
|
114
114
|
writer_org_id: Optional[str] = Field(default=None)
|
|
115
115
|
base_url: Optional[str] = Field(default=None)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# https://api.python.langchain.com/en/latest/llms/langchain_aws.llms.bedrock.BedrockLLM.html#langchain_aws.llms.bedrock.BedrockLLM
|
|
119
|
+
class BedrockConfig(BaseLLMConfig):
|
|
120
|
+
model_id: str
|
|
121
|
+
aws_access_key_id: Optional[str] = Field(default=None)
|
|
122
|
+
aws_secret_access_key: Optional[str] = Field(default=None)
|
|
123
|
+
aws_session_token: Optional[str] = Field(default=None)
|
|
124
|
+
region_name: Optional[str] = Field(default=None)
|
|
125
|
+
credentials_profile_name: Optional[str] = Field(default=None)
|
|
126
|
+
endpoint_url: Optional[str] = Field(default=None)
|
|
127
|
+
stop: Optional[List[str]] = Field(default=None)
|
|
128
|
+
temperature: Optional[float] = Field(default=0.7)
|
|
129
|
+
max_tokens: Optional[int] = Field(default=None)
|
|
130
|
+
model_kwargs: Optional[Dict[str, Any]] = Field(default=None)
|
|
@@ -16,6 +16,7 @@ from mindsdb.integrations.libs.llm.config import (
|
|
|
16
16
|
NvidiaNIMConfig,
|
|
17
17
|
MindsdbConfig,
|
|
18
18
|
WriterConfig,
|
|
19
|
+
BedrockConfig,
|
|
19
20
|
)
|
|
20
21
|
from mindsdb.utilities.config import config
|
|
21
22
|
from langchain_text_splitters import Language, RecursiveCharacterTextSplitter
|
|
@@ -222,6 +223,20 @@ def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig:
|
|
|
222
223
|
writer_org_id=args.get("writer_org_id", None),
|
|
223
224
|
base_url=args.get("base_url", None),
|
|
224
225
|
)
|
|
226
|
+
if provider == "bedrock":
|
|
227
|
+
return BedrockConfig(
|
|
228
|
+
model_id=args.get("model_name"),
|
|
229
|
+
temperature=temperature,
|
|
230
|
+
max_tokens=args.get("max_tokens", None),
|
|
231
|
+
stop=args.get("stop", None),
|
|
232
|
+
base_url=args.get("endpoint_url", None),
|
|
233
|
+
aws_access_key_id=args.get("aws_access_key_id", None),
|
|
234
|
+
aws_secret_access_key=args.get("aws_secret_access_key", None),
|
|
235
|
+
aws_session_token=args.get("aws_session_token", None),
|
|
236
|
+
region_name=args.get("aws_region_name", None),
|
|
237
|
+
credentials_profile_name=args.get("credentials_profile_name", None),
|
|
238
|
+
model_kwargs=args.get("model_kwargs", None),
|
|
239
|
+
)
|
|
225
240
|
|
|
226
241
|
raise ValueError(f"Provider {provider} is not supported.")
|
|
227
242
|
|
|
@@ -208,6 +208,7 @@ DEFAULT_TEMPERATURE = 0.0
|
|
|
208
208
|
USER_COLUMN = "question"
|
|
209
209
|
DEFAULT_EMBEDDINGS_MODEL_PROVIDER = "openai"
|
|
210
210
|
DEFAULT_EMBEDDINGS_MODEL_CLASS = OpenAIEmbeddings
|
|
211
|
+
MAX_INSERT_BATCH_SIZE = 50_000
|
|
211
212
|
DEFAULT_TIKTOKEN_MODEL_NAME = os.getenv("DEFAULT_TIKTOKEN_MODEL_NAME", "gpt-4")
|
|
212
213
|
AGENT_CHUNK_POLLING_INTERVAL_SECONDS = os.getenv("AGENT_CHUNK_POLLING_INTERVAL_SECONDS", 1.0)
|
|
213
214
|
DEFAULT_TEXT2SQL_DATABASE = "mindsdb"
|
|
@@ -40,6 +40,7 @@ from .mindsdb_chat_model import ChatMindsdb
|
|
|
40
40
|
from .callback_handlers import LogCallbackHandler, ContextCaptureCallback
|
|
41
41
|
from .langfuse_callback_handler import LangfuseCallbackHandler, get_skills
|
|
42
42
|
from .safe_output_parser import SafeOutputParser
|
|
43
|
+
from .providers import get_bedrock_chat_model
|
|
43
44
|
|
|
44
45
|
from mindsdb.interfaces.agents.constants import (
|
|
45
46
|
OPEN_AI_CHAT_MODELS,
|
|
@@ -176,6 +177,9 @@ def create_chat_model(args: Dict):
|
|
|
176
177
|
return ChatGoogleGenerativeAI(**model_kwargs)
|
|
177
178
|
if args["provider"] == "writer":
|
|
178
179
|
return ChatWriter(**model_kwargs)
|
|
180
|
+
if args["provider"] == "bedrock":
|
|
181
|
+
ChatBedrock = get_bedrock_chat_model()
|
|
182
|
+
return ChatBedrock(**model_kwargs)
|
|
179
183
|
if args["provider"] == "mindsdb":
|
|
180
184
|
return ChatMindsdb(**model_kwargs)
|
|
181
185
|
raise ValueError(f"Unknown provider: {args['provider']}")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_bedrock_chat_model():
|
|
5
|
+
try:
|
|
6
|
+
from langchain_aws.chat_models import ChatBedrock
|
|
7
|
+
except ModuleNotFoundError:
|
|
8
|
+
raise RuntimeError("bedrock connector is not installed. Please install it with `pip install langchain-aws`")
|
|
9
|
+
|
|
10
|
+
if not importlib.util.find_spec("transformers"):
|
|
11
|
+
raise RuntimeError(
|
|
12
|
+
"`transformers` module is required for bedrock to count tokens. Please install it with `pip install transformers`"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
class ChatBedrockPatched(ChatBedrock):
|
|
16
|
+
def _prepare_input_and_invoke(self, *args, **kwargs):
|
|
17
|
+
kwargs.pop("stop_sequences", None)
|
|
18
|
+
return super()._prepare_input_and_invoke(*args, **kwargs)
|
|
19
|
+
|
|
20
|
+
return ChatBedrockPatched
|
|
@@ -29,7 +29,7 @@ from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embeddi
|
|
|
29
29
|
construct_model_from_args,
|
|
30
30
|
)
|
|
31
31
|
|
|
32
|
-
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
|
|
32
|
+
from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS, MAX_INSERT_BATCH_SIZE
|
|
33
33
|
from mindsdb.interfaces.agents.langchain_agent import create_chat_model, get_llm_provider
|
|
34
34
|
from mindsdb.interfaces.database.projects import ProjectController
|
|
35
35
|
from mindsdb.interfaces.variables.variables_controller import variables_controller
|
|
@@ -245,22 +245,34 @@ class KnowledgeBaseTable:
|
|
|
245
245
|
keyword_search_cols_and_values = []
|
|
246
246
|
query_text = None
|
|
247
247
|
relevance_threshold = None
|
|
248
|
+
relevance_threshold_allowed_operators = [
|
|
249
|
+
FilterOperator.GREATER_THAN_OR_EQUAL.value,
|
|
250
|
+
FilterOperator.GREATER_THAN.value,
|
|
251
|
+
]
|
|
252
|
+
gt_filtering = False
|
|
248
253
|
hybrid_search_enabled_flag = False
|
|
249
254
|
query_conditions = db_handler.extract_conditions(query.where)
|
|
250
255
|
hybrid_search_alpha = None # Default to None, meaning no alpha weighted blending
|
|
251
256
|
if query_conditions is not None:
|
|
252
257
|
for item in query_conditions:
|
|
253
|
-
if item.column == "relevance" and item.op.value
|
|
258
|
+
if (item.column == "relevance") and (item.op.value in relevance_threshold_allowed_operators):
|
|
254
259
|
try:
|
|
255
260
|
relevance_threshold = float(item.value)
|
|
256
261
|
# Validate range: must be between 0 and 1
|
|
257
262
|
if not (0 <= relevance_threshold <= 1):
|
|
258
263
|
raise ValueError(f"relevance_threshold must be between 0 and 1, got: {relevance_threshold}")
|
|
264
|
+
if item.op.value == FilterOperator.GREATER_THAN.value:
|
|
265
|
+
gt_filtering = True
|
|
259
266
|
logger.debug(f"Found relevance_threshold in query: {relevance_threshold}")
|
|
260
267
|
except (ValueError, TypeError) as e:
|
|
261
268
|
error_msg = f"Invalid relevance_threshold value: {item.value}. {str(e)}"
|
|
262
269
|
logger.error(error_msg)
|
|
263
270
|
raise ValueError(error_msg)
|
|
271
|
+
elif (item.column == "relevance") and (item.op.value not in relevance_threshold_allowed_operators):
|
|
272
|
+
raise ValueError(
|
|
273
|
+
f"Invalid operator for relevance: {item.op.value}. Only the following operators are allowed: "
|
|
274
|
+
f"{','.join(relevance_threshold_allowed_operators)}."
|
|
275
|
+
)
|
|
264
276
|
elif item.column == "reranking":
|
|
265
277
|
if item.value is False or (isinstance(item.value, str) and item.value.lower() == "false"):
|
|
266
278
|
disable_reranking = True
|
|
@@ -279,10 +291,6 @@ class KnowledgeBaseTable:
|
|
|
279
291
|
if not (0 <= item.value <= 1):
|
|
280
292
|
raise ValueError(f"Invalid hybrid_search_alpha value: {item.value}. Must be between 0 and 1.")
|
|
281
293
|
hybrid_search_alpha = item.value
|
|
282
|
-
elif item.column == "relevance" and item.op.value != FilterOperator.GREATER_THAN_OR_EQUAL.value:
|
|
283
|
-
raise ValueError(
|
|
284
|
-
f"Invalid operator for relevance: {item.op.value}. Only GREATER_THAN_OR_EQUAL is allowed."
|
|
285
|
-
)
|
|
286
294
|
elif item.column == TableField.CONTENT.value:
|
|
287
295
|
query_text = item.value
|
|
288
296
|
|
|
@@ -368,6 +376,11 @@ class KnowledgeBaseTable:
|
|
|
368
376
|
# Check if we have a rerank_model configured in KB params
|
|
369
377
|
df = self.add_relevance(df, query_text, relevance_threshold, disable_reranking)
|
|
370
378
|
|
|
379
|
+
# if relevance filtering method is strictly GREATER THAN we filter the df
|
|
380
|
+
if gt_filtering:
|
|
381
|
+
relevance_scores = TableField.RELEVANCE.value
|
|
382
|
+
df = df[relevance_scores > relevance_threshold]
|
|
383
|
+
|
|
371
384
|
return df
|
|
372
385
|
|
|
373
386
|
def _get_allowed_metadata_columns(self) -> List[str] | None:
|
|
@@ -410,7 +423,7 @@ class KnowledgeBaseTable:
|
|
|
410
423
|
|
|
411
424
|
# Filter by threshold
|
|
412
425
|
scores_array = np.array(scores)
|
|
413
|
-
df = df[scores_array
|
|
426
|
+
df = df[scores_array >= reranker.filtering_threshold]
|
|
414
427
|
logger.debug(f"Applied reranking with params: {reranking_model_params}")
|
|
415
428
|
|
|
416
429
|
elif "distance" in df.columns:
|
|
@@ -493,6 +506,8 @@ class KnowledgeBaseTable:
|
|
|
493
506
|
"""Process and insert raw data rows"""
|
|
494
507
|
if not rows:
|
|
495
508
|
return
|
|
509
|
+
if len(rows) > MAX_INSERT_BATCH_SIZE:
|
|
510
|
+
raise ValueError("Input data is too large, please load data in batches")
|
|
496
511
|
|
|
497
512
|
df = pd.DataFrame(rows)
|
|
498
513
|
|
|
@@ -1078,6 +1093,7 @@ class KnowledgeBaseController:
|
|
|
1078
1093
|
raise EntityExistsError("Knowledge base already exists", name)
|
|
1079
1094
|
|
|
1080
1095
|
embedding_params = get_model_params(params.get("embedding_model", {}), "default_embedding_model")
|
|
1096
|
+
params["embedding_model"] = embedding_params
|
|
1081
1097
|
|
|
1082
1098
|
# if model_name is None: # Legacy
|
|
1083
1099
|
model_name = self._create_embedding_model(
|
|
@@ -1104,6 +1120,7 @@ class KnowledgeBaseController:
|
|
|
1104
1120
|
params["reranking_model"] = {}
|
|
1105
1121
|
|
|
1106
1122
|
reranking_model_params = get_model_params(reranking_model_params, "default_reranking_model")
|
|
1123
|
+
params["reranking_model"] = reranking_model_params
|
|
1107
1124
|
if reranking_model_params:
|
|
1108
1125
|
# Get reranking model from params.
|
|
1109
1126
|
# This is called here to check validaity of the parameters.
|
|
@@ -1228,6 +1245,7 @@ class KnowledgeBaseController:
|
|
|
1228
1245
|
raise RuntimeError(f"Problem with embedding model config: {e}")
|
|
1229
1246
|
return
|
|
1230
1247
|
|
|
1248
|
+
params = copy.deepcopy(params)
|
|
1231
1249
|
if "provider" in params:
|
|
1232
1250
|
engine = params.pop("provider").lower()
|
|
1233
1251
|
|