MindsDB 25.2.4.0__py3-none-any.whl → 25.3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (54) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +15 -0
  3. mindsdb/api/executor/command_executor.py +1 -1
  4. mindsdb/api/executor/datahub/datanodes/system_tables.py +6 -1
  5. mindsdb/api/executor/planner/query_planner.py +6 -2
  6. mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -1
  7. mindsdb/api/mongo/classes/query_sql.py +2 -1
  8. mindsdb/api/mongo/responders/aggregate.py +2 -2
  9. mindsdb/api/mongo/responders/coll_stats.py +3 -2
  10. mindsdb/api/mongo/responders/db_stats.py +2 -1
  11. mindsdb/api/mongo/responders/insert.py +4 -2
  12. mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py +2 -1
  13. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +5 -4
  14. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +2 -4
  15. mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
  16. mindsdb/integrations/handlers/gmail_handler/connection_args.py +2 -2
  17. mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +19 -66
  18. mindsdb/integrations/handlers/gmail_handler/requirements.txt +0 -1
  19. mindsdb/integrations/handlers/google_calendar_handler/connection_args.py +15 -0
  20. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +31 -41
  21. mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +0 -2
  22. mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +2 -38
  23. mindsdb/integrations/libs/llm/utils.py +2 -1
  24. mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py +29 -38
  25. mindsdb/integrations/utilities/pydantic_utils.py +208 -0
  26. mindsdb/integrations/utilities/rag/pipelines/rag.py +11 -4
  27. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +800 -135
  28. mindsdb/integrations/utilities/rag/settings.py +390 -152
  29. mindsdb/integrations/utilities/sql_utils.py +2 -1
  30. mindsdb/interfaces/agents/agents_controller.py +11 -7
  31. mindsdb/interfaces/agents/mindsdb_chat_model.py +4 -2
  32. mindsdb/interfaces/chatbot/chatbot_controller.py +9 -8
  33. mindsdb/interfaces/database/database.py +2 -1
  34. mindsdb/interfaces/database/projects.py +28 -2
  35. mindsdb/interfaces/jobs/jobs_controller.py +4 -1
  36. mindsdb/interfaces/model/model_controller.py +5 -2
  37. mindsdb/interfaces/skills/retrieval_tool.py +128 -39
  38. mindsdb/interfaces/skills/skill_tool.py +7 -7
  39. mindsdb/interfaces/skills/skills_controller.py +8 -4
  40. mindsdb/interfaces/storage/db.py +14 -0
  41. mindsdb/interfaces/storage/json.py +59 -0
  42. mindsdb/interfaces/storage/model_fs.py +85 -3
  43. mindsdb/interfaces/triggers/triggers_controller.py +2 -1
  44. mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py +17 -3
  45. mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py +29 -0
  46. mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py +41 -0
  47. mindsdb/utilities/config.py +5 -1
  48. mindsdb/utilities/functions.py +11 -0
  49. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.1.0.dist-info}/METADATA +221 -223
  50. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.1.0.dist-info}/RECORD +53 -51
  51. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.1.0.dist-info}/WHEEL +1 -1
  52. mindsdb/integrations/handlers/gmail_handler/utils.py +0 -45
  53. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.1.0.dist-info}/LICENSE +0 -0
  54. {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.1.0.dist-info}/top_level.txt +0 -0
@@ -10,9 +10,7 @@ from mindsdb.integrations.libs.response import (
10
10
  from mindsdb.utilities import log
11
11
  from mindsdb_sql_parser import parse_sql
12
12
 
13
- from collections import OrderedDict
14
13
  from mindsdb.utilities.config import Config
15
- from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
16
14
 
17
15
  from googleapiclient.discovery import build
18
16
 
@@ -38,12 +36,10 @@ class YoutubeHandler(APIHandler):
38
36
  name of a handler instance
39
37
  """
40
38
  super().__init__(name)
41
-
42
- connection_data = kwargs.get("connection_data", {})
39
+ self.connection_data = kwargs.get("connection_data", {})
40
+ self.kwargs = kwargs
43
41
 
44
42
  self.parser = parse_sql
45
- self.connection_data = connection_data
46
- self.kwargs = kwargs
47
43
  self.connection = None
48
44
  self.is_connected = False
49
45
 
@@ -104,7 +100,6 @@ class YoutubeHandler(APIHandler):
104
100
  Status confirmation
105
101
  """
106
102
  response = StatusResponse(False)
107
- need_to_close = self.is_connected is False
108
103
 
109
104
  try:
110
105
  self.connect()
@@ -131,34 +126,3 @@ class YoutubeHandler(APIHandler):
131
126
  """
132
127
  ast = parse_sql(query)
133
128
  return self.query(ast)
134
-
135
-
136
- connection_args = OrderedDict(
137
- youtube_access_token={
138
- "type": ARG_TYPE.STR,
139
- "description": "API Key",
140
- "label": "API Key",
141
- },
142
- credentials_url={
143
- 'type': ARG_TYPE.STR,
144
- 'description': 'URL to OAuth2 Credentials',
145
- 'label': 'URL to OAuth2 Credentials',
146
- },
147
- credentials_file={
148
- 'type': ARG_TYPE.STR,
149
- 'description': 'Location of OAuth2 Credentials',
150
- 'label': 'Location of OAuth2 Credentials',
151
- },
152
- credentials={
153
- 'type': ARG_TYPE.PATH,
154
- 'description': 'OAuth2 Credentials',
155
- 'label': 'Upload OAuth2 Credentials',
156
- },
157
- code={
158
- 'type': ARG_TYPE.STR,
159
- 'description': 'Authentication Code',
160
- 'label': 'Authentication Code',
161
- }
162
- )
163
-
164
- connection_args_example = OrderedDict(youtube_api_token="<your-youtube-api-token>")
@@ -16,6 +16,7 @@ from mindsdb.integrations.libs.llm.config import (
16
16
  NvidiaNIMConfig,
17
17
  MindsdbConfig,
18
18
  )
19
+ from mindsdb.utilities.config import config
19
20
  from langchain_text_splitters import Language, RecursiveCharacterTextSplitter
20
21
 
21
22
 
@@ -211,7 +212,7 @@ def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig:
211
212
  if provider == "mindsdb":
212
213
  return MindsdbConfig(
213
214
  model_name=args["model_name"],
214
- project_name=args.get("project_name", "mindsdb"),
215
+ project_name=args.get("project_name", config.get("default_project")),
215
216
  )
216
217
  if provider == "vllm":
217
218
  return OpenAIConfig(
@@ -1,9 +1,8 @@
1
- import os
2
1
  import json
2
+ from pathlib import Path
3
3
  import requests
4
4
  import datetime as dt
5
5
  from flask import request
6
- from shutil import copyfile
7
6
 
8
7
  from mindsdb.utilities import log
9
8
 
@@ -29,73 +28,65 @@ class GoogleUserOAuth2Manager:
29
28
  creds = None
30
29
 
31
30
  if self.credentials_file or self.credentials_url:
32
- # get the current directory and checks tokens & creds
33
- curr_dir = self.handler_storage.folder_get('config')
31
+ oauth_user_info = self.handler_storage.encrypted_json_get('oauth_user_info')
34
32
 
35
- creds_file = os.path.join(curr_dir, 'creds.json')
36
- secret_file = os.path.join(curr_dir, 'secret.json')
37
-
38
- if os.path.isfile(creds_file):
39
- creds = Credentials.from_authorized_user_file(creds_file, self.scopes)
33
+ if oauth_user_info:
34
+ creds = Credentials.from_authorized_user_info(oauth_user_info, self.scopes)
40
35
 
41
36
  if not creds or not creds.valid:
42
37
  logger.debug("Credentials do not exist or are invalid, attempting to authorize again")
43
38
 
44
- if self._download_secret_file(secret_file):
45
- # save to storage
46
- self.handler_storage.folder_sync('config')
47
- else:
48
- raise ValueError('No valid Gmail Credentials filepath or S3 url found.')
39
+ oauth_user_info = self._download_oauth_user_info()
49
40
 
50
41
  if creds and creds.expired and creds.refresh_token:
51
42
  creds.refresh(Request())
52
43
  logger.debug("Credentials refreshed successfully")
53
44
  else:
54
- creds = self._execute_google_auth_flow(secret_file, self.scopes, self.code)
45
+ creds = self._execute_google_auth_flow(oauth_user_info)
55
46
  logger.debug("New credentials obtained")
56
47
 
57
- self._save_credentials_to_file(creds, creds_file)
58
- logger.debug(f"saved session credentials to {creds_file}")
59
- self.handler_storage.folder_sync('config')
48
+ self.handler_storage.encrypted_json_set('oauth_user_info', self._convert_credentials_to_dict(creds))
49
+ logger.debug("Saving credentials to storage")
60
50
 
61
51
  return creds
62
52
 
63
- def _download_secret_file(self, secret_file):
64
- # if credentials_url is set, attempt to download the file
53
+ def _download_oauth_user_info(self):
54
+ # if credentials_url is set, attempt to download the contents of the files
65
55
  # this will be given preference over credentials_file
66
56
  if self.credentials_url:
67
57
  response = requests.get(self.credentials_url)
68
58
  if response.status_code == 200:
69
- with open(secret_file, 'w') as creds:
70
- creds.write(response.text)
71
- return True
59
+ return response.json()
72
60
  else:
73
- logger.error("Failed to get credentials from S3", response.status_code)
61
+ logger.error("Failed to get credentials from URL", response.status_code)
62
+
63
+ # if credentials_file is set, attempt to read the contents of the file
64
+ if self.credentials_file:
65
+ path = Path(self.credentials_file).expanduser()
66
+ if path.exists():
67
+ with open(path, 'r') as f:
68
+ return json.load(f)
69
+ else:
70
+ logger.error("Credentials file does not exist")
74
71
 
75
- # if credentials_file is set, attempt to copy the file
76
- if self.credentials_file and os.path.isfile(self.credentials_file):
77
- copyfile(self.credentials_file, secret_file)
78
- return True
79
- return False
72
+ raise ValueError('OAuth2 credentials could not be found')
80
73
 
81
- def _execute_google_auth_flow(self, secret_file, scopes, code=None):
82
- flow = Flow.from_client_secrets_file(secret_file, scopes)
74
+ def _execute_google_auth_flow(self, oauth_user_info: dict):
75
+ flow = Flow.from_client_config(
76
+ oauth_user_info,
77
+ scopes=self.scopes
78
+ )
83
79
 
84
80
  flow.redirect_uri = request.headers['ORIGIN'] + '/verify-auth'
85
81
 
86
- if code:
87
- flow.fetch_token(code=code)
82
+ if self.code:
83
+ flow.fetch_token(code=self.code)
88
84
  creds = flow.credentials
89
85
  return creds
90
86
  else:
91
87
  auth_url = flow.authorization_url()[0]
92
88
  raise AuthException(f'Authorisation required. Please follow the url: {auth_url}', auth_url=auth_url)
93
89
 
94
- def _save_credentials_to_file(self, creds, file_path):
95
- with open(file_path, 'w') as token:
96
- data = self._convert_credentials_to_dict(creds)
97
- token.write(json.dumps(data))
98
-
99
90
  def _convert_credentials_to_dict(self, credentials):
100
91
  return {
101
92
  'token': credentials.token,
@@ -0,0 +1,208 @@
1
+ import pprint
2
+
3
+ pydantic_schema_description = """## Understanding Pydantic Schemas for JSON Formatting
4
+
5
+ Pydantic schemas provide a framework for defining the structure and validation rules for JSON output. Below is an overview of key components commonly found in a Pydantic schema:
6
+
7
+ ### Key Components
8
+
9
+ Each object in the schema represents a Pydantic model in JSON format. Typical fields in a Pydantic model description include:
10
+
11
+ - **`anyOf`**:
12
+ - A list describing possible values for a Pydantic model field.
13
+
14
+ - **`additionalProperties`**:
15
+ - Describes the keys of a dictionary. Keys are always of type `string` due to this being a JSON Pydantic schema. The corresponding key types supported by Pydantic are:
16
+ - `string`: a text string
17
+ - `integer`: an integer number
18
+ - `number`: a floating-point number
19
+
20
+ - **`items`**:
21
+ - Describes the items contained within an `array` (list).
22
+
23
+ - **`type`**:
24
+ - Specifies the Pydantic type assigned to the field, defining the expected data type. Common types include:
25
+ - `string`: a text string
26
+ - `integer`: an integer number
27
+ - `number`: a floating-point number
28
+ - `array`: a list
29
+ - `object`: a dictionary
30
+ - `null`: the python null value None. Indicates the field is optional.
31
+
32
+ - **`description`**:
33
+ - Provides a textual narrative explaining the purpose and details of the output JSON field.
34
+
35
+ - **`title`**:
36
+ - A Pydantic-generated, human-readable title for the field.
37
+
38
+ - **`default`**:
39
+ - The default value for this field if no value is provided by the user.
40
+
41
+ ### Schema
42
+
43
+ Below is the Pydantic schema:
44
+
45
+ {schema}
46
+
47
+ ### Examples
48
+
49
+ Below is an example of well-formed output adhering to this schema.
50
+
51
+ - Dummy text strings are represented as "lorem ipsum."
52
+
53
+ {example}
54
+ """
55
+
56
+
57
+ def get_dummy_value(field_value):
58
+ """A function to return a dummy value of a Pydantic model field."""
59
+ type_str = field_value["type"]
60
+ example_dict = {
61
+ "string": "lorem ipsum",
62
+ "int": 3,
63
+ "number": 42.0,
64
+ "null": None,
65
+ "object": {"lorem ipsum": "lorem_ipsum"},
66
+ }
67
+
68
+ if type_str in example_dict:
69
+ return example_dict[type_str]
70
+ else:
71
+ return None
72
+
73
+
74
+ def get_dummy_array(field_value):
75
+ """A function to return a dummy array of a Pydantic model field."""
76
+ items = field_value["items"]
77
+
78
+ if "type" in items:
79
+ if items["type"] == "null": # skip if null
80
+ pass
81
+ elif items["type"] == "array": # is it an array?
82
+ array_value = get_dummy_array(items)
83
+ elif (
84
+ items["type"] == "object" and "additionalProperties" in items
85
+ ): # is it a dict?
86
+ array_value = get_dummy_dict(items)
87
+ else: # it is a regular value!
88
+ array_value = get_dummy_value(items)
89
+ return [array_value for _ in range(2)]
90
+
91
+ elif "AnyOf" in field_value["items"]:
92
+ array_value = get_any_of(field_value["items"]) # can be one of many types
93
+ return [array_value for _ in range(2)]
94
+
95
+ else: # is it a pydantic class?
96
+ array_value = example_generator(items)
97
+ return [array_value for _ in range(2)]
98
+
99
+
100
+ def get_dummy_dict(field_value):
101
+ """A function to return a dummy dictionary of a Pydantic model field."""
102
+ return get_dummy_value(field_value)
103
+
104
+
105
+ def get_any_of(field_value):
106
+ """A function to return the first viable pydantic type of an Any() Pydantic model field."""
107
+ for any_of in field_value["anyOf"]:
108
+ if "type" in any_of:
109
+ if any_of["type"] == "null": # skip if null
110
+ continue
111
+ elif any_of["type"] == "array": # is it an array?
112
+ out = get_dummy_array(any_of)
113
+ return out
114
+ elif (
115
+ any_of["type"] == "object" and "additionalProperties" in any_of
116
+ ): # is it a dict?
117
+ out = get_dummy_dict(any_of)
118
+ return out
119
+ else: # it is a regular value!
120
+ out = get_dummy_value(any_of)
121
+ return out
122
+ else: # is it a pydantic class?
123
+ out = example_generator(any_of)
124
+ return out
125
+
126
+
127
+ def example_generator(pydantic_json_schema):
128
+ """dynamically parse a pydantic object and generate an example of it's formatting."""
129
+
130
+ example_dict = {}
131
+ for schema_name, schema in pydantic_json_schema.items():
132
+
133
+ for field_name, field_value in schema.items():
134
+ if "type" in field_value:
135
+
136
+ if field_value["type"] == "array": # is it an array?
137
+ example_dict[field_name] = get_dummy_array(field_value)
138
+
139
+ elif (
140
+ field_value["type"] == "object"
141
+ and "additionalProperties" in field_value
142
+ ): # is it a dict?
143
+ example_dict[field_name] = get_dummy_dict(field_value)
144
+
145
+ else: # it is a regular value!
146
+ example_dict[field_name] = get_dummy_value(field_value)
147
+
148
+ elif "anyOf" in field_value:
149
+ example_dict[field_name] = get_any_of(field_value)
150
+
151
+ else: # it is a pydantic class
152
+ example_dict[field_name] = example_generator(field_value)
153
+ return example_dict
154
+
155
+
156
+ def search_and_replace_refs(schema, defs, ref_skip={}, n=0):
157
+ """Dynamically substitute subclass references in a Pydantic object schema."""
158
+ for key, value in schema.items():
159
+ if key in ref_skip:
160
+ continue
161
+ if type(value) is dict:
162
+ if "$ref" in value:
163
+ definition_key = value["$ref"].split("/")[-1]
164
+ if definition_key in ref_skip:
165
+ schema[key] = {"type": "null"}
166
+ else:
167
+ schema[key] = {definition_key: defs[definition_key]["properties"]}
168
+ else:
169
+ search_and_replace_refs(value, defs, ref_skip, n + 1)
170
+ elif type(value) is list:
171
+ for val in value:
172
+ search_and_replace_refs(val, defs, ref_skip, n + 1)
173
+
174
+
175
+ def remove_extraneous_fields(schema, ref_skip):
176
+ """Remove extraneous fields from object descriptions."""
177
+ reduced_schema = schema["properties"]
178
+
179
+ for ref in ref_skip.keys():
180
+ if ref in reduced_schema:
181
+ del reduced_schema[ref]
182
+
183
+ for key, value in reduced_schema.items():
184
+ if "title" in value:
185
+ del value["title"]
186
+ if "$defs" in value:
187
+ del value["$defs"]
188
+ if "required" in value:
189
+ del value["required"]
190
+
191
+ return reduced_schema
192
+
193
+
194
+ def format_for_prompt(pydantic_object, ref_skip={}):
195
+ """Format a Pydantic object description for prompting an LLM."""
196
+ schema = {k: v for k, v in pydantic_object.schema().items()}
197
+
198
+ search_and_replace_refs(
199
+ schema=schema["properties"], defs=schema["$defs"], ref_skip=ref_skip, n=0
200
+ )
201
+
202
+ reduced_schema = remove_extraneous_fields(schema, ref_skip)
203
+
204
+ reduced_schema = {schema["title"]: reduced_schema}
205
+
206
+ out = pprint.pformat(reduced_schema)
207
+
208
+ return out, reduced_schema
@@ -294,16 +294,23 @@ class LangChainRAGPipeline:
294
294
  retriever = SQLRetriever(
295
295
  fallback_retriever=vector_store_retriever,
296
296
  vector_store_handler=knowledge_base_table.get_vector_db(),
297
- metadata_schemas=retriever_config.metadata_schemas,
298
- examples=retriever_config.examples,
297
+ min_k=retriever_config.min_k,
298
+ max_filters=retriever_config.max_filters,
299
+ filter_threshold=retriever_config.filter_threshold,
300
+ database_schema=retriever_config.database_schema,
299
301
  embeddings_model=embeddings,
302
+ search_kwargs=config.search_kwargs,
300
303
  rewrite_prompt_template=retriever_config.rewrite_prompt_template,
301
- metadata_filters_prompt_template=retriever_config.metadata_filters_prompt_template,
304
+ table_prompt_template=retriever_config.table_prompt_template,
305
+ column_prompt_template=retriever_config.column_prompt_template,
306
+ value_prompt_template=retriever_config.value_prompt_template,
307
+ boolean_system_prompt=retriever_config.boolean_system_prompt,
308
+ generative_system_prompt=retriever_config.generative_system_prompt,
302
309
  num_retries=retriever_config.num_retries,
303
310
  embeddings_table=knowledge_base_table._kb.vector_database_table,
304
311
  source_table=retriever_config.source_table,
312
+ source_id_column=retriever_config.source_id_column,
305
313
  distance_function=distance_function,
306
- search_kwargs=config.search_kwargs,
307
314
  llm=sql_llm
308
315
  )
309
316
  return cls(