MindsDB 25.2.4.0__py3-none-any.whl → 25.3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +16 -1
- mindsdb/api/executor/command_executor.py +1 -1
- mindsdb/api/executor/datahub/datanodes/system_tables.py +6 -1
- mindsdb/api/executor/planner/query_planner.py +6 -2
- mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -1
- mindsdb/api/executor/sql_query/steps/union_step.py +21 -24
- mindsdb/api/http/gui.py +5 -4
- mindsdb/api/http/initialize.py +19 -19
- mindsdb/api/mongo/classes/query_sql.py +2 -1
- mindsdb/api/mongo/responders/aggregate.py +2 -2
- mindsdb/api/mongo/responders/coll_stats.py +3 -2
- mindsdb/api/mongo/responders/db_stats.py +2 -1
- mindsdb/api/mongo/responders/insert.py +4 -2
- mindsdb/api/mysql/mysql_proxy/classes/fake_mysql_proxy/fake_mysql_proxy.py +2 -1
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +5 -4
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +2 -4
- mindsdb/integrations/handlers/autosklearn_handler/autosklearn_handler.py +1 -1
- mindsdb/integrations/handlers/gmail_handler/connection_args.py +2 -2
- mindsdb/integrations/handlers/gmail_handler/gmail_handler.py +19 -66
- mindsdb/integrations/handlers/gmail_handler/requirements.txt +0 -1
- mindsdb/integrations/handlers/google_calendar_handler/connection_args.py +15 -0
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_handler.py +31 -41
- mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/jira_handler/__init__.py +1 -0
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +22 -80
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +3 -3
- mindsdb/integrations/handlers/slack_handler/slack_handler.py +2 -1
- mindsdb/integrations/handlers/youtube_handler/youtube_handler.py +2 -38
- mindsdb/integrations/libs/api_handler_generator.py +583 -0
- mindsdb/integrations/libs/llm/utils.py +2 -1
- mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py +29 -38
- mindsdb/integrations/utilities/pydantic_utils.py +208 -0
- mindsdb/integrations/utilities/rag/pipelines/rag.py +11 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +800 -135
- mindsdb/integrations/utilities/rag/settings.py +390 -152
- mindsdb/integrations/utilities/sql_utils.py +2 -1
- mindsdb/interfaces/agents/agents_controller.py +11 -7
- mindsdb/interfaces/agents/mindsdb_chat_model.py +4 -2
- mindsdb/interfaces/chatbot/chatbot_controller.py +9 -8
- mindsdb/interfaces/database/database.py +2 -1
- mindsdb/interfaces/database/projects.py +28 -2
- mindsdb/interfaces/jobs/jobs_controller.py +4 -1
- mindsdb/interfaces/model/model_controller.py +5 -2
- mindsdb/interfaces/skills/retrieval_tool.py +128 -39
- mindsdb/interfaces/skills/skill_tool.py +7 -7
- mindsdb/interfaces/skills/skills_controller.py +8 -4
- mindsdb/interfaces/storage/db.py +14 -0
- mindsdb/interfaces/storage/json.py +59 -0
- mindsdb/interfaces/storage/model_fs.py +85 -3
- mindsdb/interfaces/triggers/triggers_controller.py +2 -1
- mindsdb/migrations/versions/2022-10-14_43c52d23845a_projects.py +17 -3
- mindsdb/migrations/versions/2025-02-14_4521dafe89ab_added_encrypted_content_to_json_storage.py +29 -0
- mindsdb/migrations/versions/2025-02-19_11347c213b36_added_metadata_to_projects.py +41 -0
- mindsdb/utilities/config.py +6 -2
- mindsdb/utilities/functions.py +11 -0
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/METADATA +219 -222
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/RECORD +61 -60
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/WHEEL +1 -1
- mindsdb/integrations/handlers/gmail_handler/utils.py +0 -45
- mindsdb/integrations/handlers/jira_handler/jira_table.py +0 -172
- mindsdb/integrations/handlers/jira_handler/requirements.txt +0 -1
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/LICENSE +0 -0
- {MindsDB-25.2.4.0.dist-info → mindsdb-25.3.2.0.dist-info}/top_level.txt +0 -0
mindsdb/integrations/utilities/handlers/auth_utilities/google/google_user_oauth_utilities.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import json
|
|
2
|
+
from pathlib import Path
|
|
3
3
|
import requests
|
|
4
4
|
import datetime as dt
|
|
5
5
|
from flask import request
|
|
6
|
-
from shutil import copyfile
|
|
7
6
|
|
|
8
7
|
from mindsdb.utilities import log
|
|
9
8
|
|
|
@@ -29,73 +28,65 @@ class GoogleUserOAuth2Manager:
|
|
|
29
28
|
creds = None
|
|
30
29
|
|
|
31
30
|
if self.credentials_file or self.credentials_url:
|
|
32
|
-
|
|
33
|
-
curr_dir = self.handler_storage.folder_get('config')
|
|
31
|
+
oauth_user_info = self.handler_storage.encrypted_json_get('oauth_user_info')
|
|
34
32
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
if os.path.isfile(creds_file):
|
|
39
|
-
creds = Credentials.from_authorized_user_file(creds_file, self.scopes)
|
|
33
|
+
if oauth_user_info:
|
|
34
|
+
creds = Credentials.from_authorized_user_info(oauth_user_info, self.scopes)
|
|
40
35
|
|
|
41
36
|
if not creds or not creds.valid:
|
|
42
37
|
logger.debug("Credentials do not exist or are invalid, attempting to authorize again")
|
|
43
38
|
|
|
44
|
-
|
|
45
|
-
# save to storage
|
|
46
|
-
self.handler_storage.folder_sync('config')
|
|
47
|
-
else:
|
|
48
|
-
raise ValueError('No valid Gmail Credentials filepath or S3 url found.')
|
|
39
|
+
oauth_user_info = self._download_oauth_user_info()
|
|
49
40
|
|
|
50
41
|
if creds and creds.expired and creds.refresh_token:
|
|
51
42
|
creds.refresh(Request())
|
|
52
43
|
logger.debug("Credentials refreshed successfully")
|
|
53
44
|
else:
|
|
54
|
-
creds = self._execute_google_auth_flow(
|
|
45
|
+
creds = self._execute_google_auth_flow(oauth_user_info)
|
|
55
46
|
logger.debug("New credentials obtained")
|
|
56
47
|
|
|
57
|
-
self.
|
|
58
|
-
logger.debug(
|
|
59
|
-
self.handler_storage.folder_sync('config')
|
|
48
|
+
self.handler_storage.encrypted_json_set('oauth_user_info', self._convert_credentials_to_dict(creds))
|
|
49
|
+
logger.debug("Saving credentials to storage")
|
|
60
50
|
|
|
61
51
|
return creds
|
|
62
52
|
|
|
63
|
-
def
|
|
64
|
-
# if credentials_url is set, attempt to download the
|
|
53
|
+
def _download_oauth_user_info(self):
|
|
54
|
+
# if credentials_url is set, attempt to download the contents of the files
|
|
65
55
|
# this will be given preference over credentials_file
|
|
66
56
|
if self.credentials_url:
|
|
67
57
|
response = requests.get(self.credentials_url)
|
|
68
58
|
if response.status_code == 200:
|
|
69
|
-
|
|
70
|
-
creds.write(response.text)
|
|
71
|
-
return True
|
|
59
|
+
return response.json()
|
|
72
60
|
else:
|
|
73
|
-
logger.error("Failed to get credentials from
|
|
61
|
+
logger.error("Failed to get credentials from URL", response.status_code)
|
|
62
|
+
|
|
63
|
+
# if credentials_file is set, attempt to read the contents of the file
|
|
64
|
+
if self.credentials_file:
|
|
65
|
+
path = Path(self.credentials_file).expanduser()
|
|
66
|
+
if path.exists():
|
|
67
|
+
with open(path, 'r') as f:
|
|
68
|
+
return json.load(f)
|
|
69
|
+
else:
|
|
70
|
+
logger.error("Credentials file does not exist")
|
|
74
71
|
|
|
75
|
-
|
|
76
|
-
if self.credentials_file and os.path.isfile(self.credentials_file):
|
|
77
|
-
copyfile(self.credentials_file, secret_file)
|
|
78
|
-
return True
|
|
79
|
-
return False
|
|
72
|
+
raise ValueError('OAuth2 credentials could not be found')
|
|
80
73
|
|
|
81
|
-
def _execute_google_auth_flow(self,
|
|
82
|
-
flow = Flow.
|
|
74
|
+
def _execute_google_auth_flow(self, oauth_user_info: dict):
|
|
75
|
+
flow = Flow.from_client_config(
|
|
76
|
+
oauth_user_info,
|
|
77
|
+
scopes=self.scopes
|
|
78
|
+
)
|
|
83
79
|
|
|
84
80
|
flow.redirect_uri = request.headers['ORIGIN'] + '/verify-auth'
|
|
85
81
|
|
|
86
|
-
if code:
|
|
87
|
-
flow.fetch_token(code=code)
|
|
82
|
+
if self.code:
|
|
83
|
+
flow.fetch_token(code=self.code)
|
|
88
84
|
creds = flow.credentials
|
|
89
85
|
return creds
|
|
90
86
|
else:
|
|
91
87
|
auth_url = flow.authorization_url()[0]
|
|
92
88
|
raise AuthException(f'Authorisation required. Please follow the url: {auth_url}', auth_url=auth_url)
|
|
93
89
|
|
|
94
|
-
def _save_credentials_to_file(self, creds, file_path):
|
|
95
|
-
with open(file_path, 'w') as token:
|
|
96
|
-
data = self._convert_credentials_to_dict(creds)
|
|
97
|
-
token.write(json.dumps(data))
|
|
98
|
-
|
|
99
90
|
def _convert_credentials_to_dict(self, credentials):
|
|
100
91
|
return {
|
|
101
92
|
'token': credentials.token,
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import pprint
|
|
2
|
+
|
|
3
|
+
pydantic_schema_description = """## Understanding Pydantic Schemas for JSON Formatting
|
|
4
|
+
|
|
5
|
+
Pydantic schemas provide a framework for defining the structure and validation rules for JSON output. Below is an overview of key components commonly found in a Pydantic schema:
|
|
6
|
+
|
|
7
|
+
### Key Components
|
|
8
|
+
|
|
9
|
+
Each object in the schema represents a Pydantic model in JSON format. Typical fields in a Pydantic model description include:
|
|
10
|
+
|
|
11
|
+
- **`anyOf`**:
|
|
12
|
+
- A list describing possible values for a Pydantic model field.
|
|
13
|
+
|
|
14
|
+
- **`additionalProperties`**:
|
|
15
|
+
- Describes the keys of a dictionary. Keys are always of type `string` due to this being a JSON Pydantic schema. The corresponding key types supported by Pydantic are:
|
|
16
|
+
- `string`: a text string
|
|
17
|
+
- `integer`: an integer number
|
|
18
|
+
- `number`: a floating-point number
|
|
19
|
+
|
|
20
|
+
- **`items`**:
|
|
21
|
+
- Describes the items contained within an `array` (list).
|
|
22
|
+
|
|
23
|
+
- **`type`**:
|
|
24
|
+
- Specifies the Pydantic type assigned to the field, defining the expected data type. Common types include:
|
|
25
|
+
- `string`: a text string
|
|
26
|
+
- `integer`: an integer number
|
|
27
|
+
- `number`: a floating-point number
|
|
28
|
+
- `array`: a list
|
|
29
|
+
- `object`: a dictionary
|
|
30
|
+
- `null`: the python null value None. Indicates the field is optional.
|
|
31
|
+
|
|
32
|
+
- **`description`**:
|
|
33
|
+
- Provides a textual narrative explaining the purpose and details of the output JSON field.
|
|
34
|
+
|
|
35
|
+
- **`title`**:
|
|
36
|
+
- A Pydantic-generated, human-readable title for the field.
|
|
37
|
+
|
|
38
|
+
- **`default`**:
|
|
39
|
+
- The default value for this field if no value is provided by the user.
|
|
40
|
+
|
|
41
|
+
### Schema
|
|
42
|
+
|
|
43
|
+
Below is the Pydantic schema:
|
|
44
|
+
|
|
45
|
+
{schema}
|
|
46
|
+
|
|
47
|
+
### Examples
|
|
48
|
+
|
|
49
|
+
Below is an example of well-formed output adhering to this schema.
|
|
50
|
+
|
|
51
|
+
- Dummy text strings are represented as "lorem ipsum."
|
|
52
|
+
|
|
53
|
+
{example}
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_dummy_value(field_value):
|
|
58
|
+
"""A function to return a dummy value of a Pydantic model field."""
|
|
59
|
+
type_str = field_value["type"]
|
|
60
|
+
example_dict = {
|
|
61
|
+
"string": "lorem ipsum",
|
|
62
|
+
"int": 3,
|
|
63
|
+
"number": 42.0,
|
|
64
|
+
"null": None,
|
|
65
|
+
"object": {"lorem ipsum": "lorem_ipsum"},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if type_str in example_dict:
|
|
69
|
+
return example_dict[type_str]
|
|
70
|
+
else:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_dummy_array(field_value):
|
|
75
|
+
"""A function to return a dummy array of a Pydantic model field."""
|
|
76
|
+
items = field_value["items"]
|
|
77
|
+
|
|
78
|
+
if "type" in items:
|
|
79
|
+
if items["type"] == "null": # skip if null
|
|
80
|
+
pass
|
|
81
|
+
elif items["type"] == "array": # is it an array?
|
|
82
|
+
array_value = get_dummy_array(items)
|
|
83
|
+
elif (
|
|
84
|
+
items["type"] == "object" and "additionalProperties" in items
|
|
85
|
+
): # is it a dict?
|
|
86
|
+
array_value = get_dummy_dict(items)
|
|
87
|
+
else: # it is a regular value!
|
|
88
|
+
array_value = get_dummy_value(items)
|
|
89
|
+
return [array_value for _ in range(2)]
|
|
90
|
+
|
|
91
|
+
elif "AnyOf" in field_value["items"]:
|
|
92
|
+
array_value = get_any_of(field_value["items"]) # can be one of many types
|
|
93
|
+
return [array_value for _ in range(2)]
|
|
94
|
+
|
|
95
|
+
else: # is it a pydantic class?
|
|
96
|
+
array_value = example_generator(items)
|
|
97
|
+
return [array_value for _ in range(2)]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_dummy_dict(field_value):
|
|
101
|
+
"""A function to return a dummy dictionary of a Pydantic model field."""
|
|
102
|
+
return get_dummy_value(field_value)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_any_of(field_value):
|
|
106
|
+
"""A function to return the first viable pydantic type of an Any() Pydantic model field."""
|
|
107
|
+
for any_of in field_value["anyOf"]:
|
|
108
|
+
if "type" in any_of:
|
|
109
|
+
if any_of["type"] == "null": # skip if null
|
|
110
|
+
continue
|
|
111
|
+
elif any_of["type"] == "array": # is it an array?
|
|
112
|
+
out = get_dummy_array(any_of)
|
|
113
|
+
return out
|
|
114
|
+
elif (
|
|
115
|
+
any_of["type"] == "object" and "additionalProperties" in any_of
|
|
116
|
+
): # is it a dict?
|
|
117
|
+
out = get_dummy_dict(any_of)
|
|
118
|
+
return out
|
|
119
|
+
else: # it is a regular value!
|
|
120
|
+
out = get_dummy_value(any_of)
|
|
121
|
+
return out
|
|
122
|
+
else: # is it a pydantic class?
|
|
123
|
+
out = example_generator(any_of)
|
|
124
|
+
return out
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def example_generator(pydantic_json_schema):
|
|
128
|
+
"""dynamically parse a pydantic object and generate an example of it's formatting."""
|
|
129
|
+
|
|
130
|
+
example_dict = {}
|
|
131
|
+
for schema_name, schema in pydantic_json_schema.items():
|
|
132
|
+
|
|
133
|
+
for field_name, field_value in schema.items():
|
|
134
|
+
if "type" in field_value:
|
|
135
|
+
|
|
136
|
+
if field_value["type"] == "array": # is it an array?
|
|
137
|
+
example_dict[field_name] = get_dummy_array(field_value)
|
|
138
|
+
|
|
139
|
+
elif (
|
|
140
|
+
field_value["type"] == "object"
|
|
141
|
+
and "additionalProperties" in field_value
|
|
142
|
+
): # is it a dict?
|
|
143
|
+
example_dict[field_name] = get_dummy_dict(field_value)
|
|
144
|
+
|
|
145
|
+
else: # it is a regular value!
|
|
146
|
+
example_dict[field_name] = get_dummy_value(field_value)
|
|
147
|
+
|
|
148
|
+
elif "anyOf" in field_value:
|
|
149
|
+
example_dict[field_name] = get_any_of(field_value)
|
|
150
|
+
|
|
151
|
+
else: # it is a pydantic class
|
|
152
|
+
example_dict[field_name] = example_generator(field_value)
|
|
153
|
+
return example_dict
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def search_and_replace_refs(schema, defs, ref_skip={}, n=0):
|
|
157
|
+
"""Dynamically substitute subclass references in a Pydantic object schema."""
|
|
158
|
+
for key, value in schema.items():
|
|
159
|
+
if key in ref_skip:
|
|
160
|
+
continue
|
|
161
|
+
if type(value) is dict:
|
|
162
|
+
if "$ref" in value:
|
|
163
|
+
definition_key = value["$ref"].split("/")[-1]
|
|
164
|
+
if definition_key in ref_skip:
|
|
165
|
+
schema[key] = {"type": "null"}
|
|
166
|
+
else:
|
|
167
|
+
schema[key] = {definition_key: defs[definition_key]["properties"]}
|
|
168
|
+
else:
|
|
169
|
+
search_and_replace_refs(value, defs, ref_skip, n + 1)
|
|
170
|
+
elif type(value) is list:
|
|
171
|
+
for val in value:
|
|
172
|
+
search_and_replace_refs(val, defs, ref_skip, n + 1)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def remove_extraneous_fields(schema, ref_skip):
|
|
176
|
+
"""Remove extraneous fields from object descriptions."""
|
|
177
|
+
reduced_schema = schema["properties"]
|
|
178
|
+
|
|
179
|
+
for ref in ref_skip.keys():
|
|
180
|
+
if ref in reduced_schema:
|
|
181
|
+
del reduced_schema[ref]
|
|
182
|
+
|
|
183
|
+
for key, value in reduced_schema.items():
|
|
184
|
+
if "title" in value:
|
|
185
|
+
del value["title"]
|
|
186
|
+
if "$defs" in value:
|
|
187
|
+
del value["$defs"]
|
|
188
|
+
if "required" in value:
|
|
189
|
+
del value["required"]
|
|
190
|
+
|
|
191
|
+
return reduced_schema
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def format_for_prompt(pydantic_object, ref_skip={}):
|
|
195
|
+
"""Format a Pydantic object description for prompting an LLM."""
|
|
196
|
+
schema = {k: v for k, v in pydantic_object.schema().items()}
|
|
197
|
+
|
|
198
|
+
search_and_replace_refs(
|
|
199
|
+
schema=schema["properties"], defs=schema["$defs"], ref_skip=ref_skip, n=0
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
reduced_schema = remove_extraneous_fields(schema, ref_skip)
|
|
203
|
+
|
|
204
|
+
reduced_schema = {schema["title"]: reduced_schema}
|
|
205
|
+
|
|
206
|
+
out = pprint.pformat(reduced_schema)
|
|
207
|
+
|
|
208
|
+
return out, reduced_schema
|
|
@@ -294,16 +294,23 @@ class LangChainRAGPipeline:
|
|
|
294
294
|
retriever = SQLRetriever(
|
|
295
295
|
fallback_retriever=vector_store_retriever,
|
|
296
296
|
vector_store_handler=knowledge_base_table.get_vector_db(),
|
|
297
|
-
|
|
298
|
-
|
|
297
|
+
min_k=retriever_config.min_k,
|
|
298
|
+
max_filters=retriever_config.max_filters,
|
|
299
|
+
filter_threshold=retriever_config.filter_threshold,
|
|
300
|
+
database_schema=retriever_config.database_schema,
|
|
299
301
|
embeddings_model=embeddings,
|
|
302
|
+
search_kwargs=config.search_kwargs,
|
|
300
303
|
rewrite_prompt_template=retriever_config.rewrite_prompt_template,
|
|
301
|
-
|
|
304
|
+
table_prompt_template=retriever_config.table_prompt_template,
|
|
305
|
+
column_prompt_template=retriever_config.column_prompt_template,
|
|
306
|
+
value_prompt_template=retriever_config.value_prompt_template,
|
|
307
|
+
boolean_system_prompt=retriever_config.boolean_system_prompt,
|
|
308
|
+
generative_system_prompt=retriever_config.generative_system_prompt,
|
|
302
309
|
num_retries=retriever_config.num_retries,
|
|
303
310
|
embeddings_table=knowledge_base_table._kb.vector_database_table,
|
|
304
311
|
source_table=retriever_config.source_table,
|
|
312
|
+
source_id_column=retriever_config.source_id_column,
|
|
305
313
|
distance_function=distance_function,
|
|
306
|
-
search_kwargs=config.search_kwargs,
|
|
307
314
|
llm=sql_llm
|
|
308
315
|
)
|
|
309
316
|
return cls(
|