MindsDB 25.4.5.0__py3-none-any.whl → 25.5.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (63) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +107 -125
  3. mindsdb/api/executor/command_executor.py +2 -1
  4. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +8 -0
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -13
  6. mindsdb/api/executor/planner/query_planner.py +4 -1
  7. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +2 -1
  8. mindsdb/api/http/initialize.py +20 -3
  9. mindsdb/api/http/namespaces/analysis.py +14 -1
  10. mindsdb/api/http/namespaces/tree.py +1 -1
  11. mindsdb/api/http/start.py +7 -2
  12. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +4 -8
  13. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -4
  14. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +2 -2
  15. mindsdb/integrations/handlers/bigquery_handler/requirements.txt +1 -0
  16. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -0
  17. mindsdb/integrations/handlers/gmail_handler/requirements.txt +1 -0
  18. mindsdb/integrations/handlers/google_analytics_handler/requirements.txt +2 -1
  19. mindsdb/integrations/handlers/google_books_handler/requirements.txt +1 -1
  20. mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +1 -0
  21. mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt +1 -1
  22. mindsdb/integrations/handlers/google_fit_handler/requirements.txt +2 -0
  23. mindsdb/integrations/handlers/google_search_handler/requirements.txt +1 -1
  24. mindsdb/integrations/handlers/jira_handler/jira_handler.archived.py +75 -0
  25. mindsdb/integrations/handlers/jira_handler/jira_handler.py +113 -38
  26. mindsdb/integrations/handlers/jira_handler/jira_tables.py +229 -0
  27. mindsdb/integrations/handlers/jira_handler/requirements.txt +1 -0
  28. mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -0
  29. mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -2
  30. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  31. mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -0
  32. mindsdb/integrations/handlers/ms_one_drive_handler/requirements.txt +2 -0
  33. mindsdb/integrations/handlers/ms_teams_handler/requirements.txt +3 -1
  34. mindsdb/integrations/handlers/openai_handler/openai_handler.py +5 -4
  35. mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
  36. mindsdb/integrations/handlers/vertex_handler/requirements.txt +1 -0
  37. mindsdb/integrations/handlers/youtube_handler/requirements.txt +1 -0
  38. mindsdb/integrations/utilities/files/file_reader.py +5 -2
  39. mindsdb/interfaces/agents/constants.py +14 -2
  40. mindsdb/interfaces/agents/langchain_agent.py +2 -4
  41. mindsdb/interfaces/database/projects.py +1 -7
  42. mindsdb/interfaces/functions/controller.py +11 -14
  43. mindsdb/interfaces/functions/to_markdown.py +9 -124
  44. mindsdb/interfaces/knowledge_base/controller.py +22 -19
  45. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +28 -5
  46. mindsdb/interfaces/knowledge_base/utils.py +10 -15
  47. mindsdb/interfaces/model/model_controller.py +0 -2
  48. mindsdb/interfaces/skills/sql_agent.py +33 -11
  49. mindsdb/migrations/migrate.py +0 -2
  50. mindsdb/utilities/config.py +3 -2
  51. mindsdb/utilities/context.py +1 -1
  52. mindsdb/utilities/functions.py +0 -36
  53. mindsdb/utilities/langfuse.py +19 -10
  54. mindsdb/utilities/otel/__init__.py +9 -193
  55. mindsdb/utilities/otel/metric_handlers/__init__.py +5 -1
  56. mindsdb/utilities/otel/prepare.py +198 -0
  57. mindsdb/utilities/sql.py +83 -0
  58. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.3.0.dist-info}/METADATA +663 -596
  59. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.3.0.dist-info}/RECORD +62 -57
  60. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.3.0.dist-info}/WHEEL +1 -1
  61. mindsdb/api/mysql/mysql_proxy/classes/sql_statement_parser.py +0 -151
  62. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.3.0.dist-info}/licenses/LICENSE +0 -0
  63. {mindsdb-25.4.5.0.dist-info → mindsdb-25.5.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,75 @@
1
+ from mindsdb.integrations.libs.api_handler import APIHandler
2
+ from mindsdb.integrations.libs.response import (
3
+ HandlerStatusResponse as StatusResponse,
4
+ )
5
+ from mindsdb.utilities import log
6
+ from mindsdb.integrations.libs.api_handler_generator import APIResourceGenerator
7
+
8
+
9
+ logger = log.getLogger(__name__)
10
+
11
+
12
+ class JiraHandler(APIHandler):
13
+
14
+ def __init__(self, name=None, **kwargs):
15
+ """
16
+ Initialize the handler.
17
+ Args:
18
+ name (str): name of particular handler instance
19
+ connection_data (dict): parameters for connecting to the database
20
+ **kwargs: arbitrary keyword arguments.
21
+ """
22
+ super().__init__(name)
23
+ self.connection_data = kwargs.get("connection_data", {})
24
+
25
+ self.connection = None
26
+ self.is_connected = False
27
+
28
+ # todo store parsed data in files
29
+
30
+ self.api_resource_generator = APIResourceGenerator(
31
+ "https://developer.atlassian.com/cloud/jira/platform/swagger-v3.v3.json",
32
+ self.connection_data,
33
+ url_base='/rest/api/3/',
34
+ options={
35
+ 'offset_param': ['startAt', 'offset'],
36
+ 'total_column': ['totalEntryCount', 'total'],
37
+ 'check_connection_table': 'myself'
38
+ }
39
+ )
40
+
41
+ resource_tables = self.api_resource_generator.generate_api_resources(self)
42
+
43
+ for table_name, resource in resource_tables.items():
44
+ self._register_table(table_name, resource)
45
+
46
+ def __del__(self):
47
+ if self.is_connected is True:
48
+ self.disconnect()
49
+
50
+ def connect(self):
51
+ """
52
+ Set up the connection required by the handler.
53
+ Returns:
54
+ HandlerStatusResponse
55
+ """
56
+ return
57
+
58
+ def check_connection(self) -> StatusResponse:
59
+ """
60
+ Check connection to the handler.
61
+ Returns:
62
+ HandlerStatusResponse
63
+ """
64
+
65
+ response = StatusResponse(False)
66
+
67
+ try:
68
+ self.api_resource_generator.check_connection()
69
+ response.success = True
70
+ except Exception as e:
71
+ logger.error(f"Error connecting to Jira API: {e}!")
72
+ response.error_message = e
73
+
74
+ self.is_connected = response.success
75
+ return response
@@ -1,75 +1,150 @@
1
+ from typing import Any, Dict
2
+
3
+ from atlassian import Jira
4
+ from requests.exceptions import HTTPError
5
+
6
+ from mindsdb.integrations.handlers.jira_handler.jira_tables import JiraProjectsTable, JiraIssuesTable, JiraUsersTable, JiraGroupsTable
1
7
  from mindsdb.integrations.libs.api_handler import APIHandler
2
8
  from mindsdb.integrations.libs.response import (
9
+ HandlerResponse as Response,
3
10
  HandlerStatusResponse as StatusResponse,
11
+ RESPONSE_TYPE,
4
12
  )
5
13
  from mindsdb.utilities import log
6
- from mindsdb.integrations.libs.api_handler_generator import APIResourceGenerator
7
14
 
8
15
 
9
16
  logger = log.getLogger(__name__)
10
17
 
11
18
 
12
19
  class JiraHandler(APIHandler):
20
+ """
21
+ This handler handles the connection and execution of SQL statements on Jira.
22
+ """
13
23
 
14
- def __init__(self, name=None, **kwargs):
24
+ def __init__(self, name: str, connection_data: Dict, **kwargs: Any) -> None:
15
25
  """
16
- Initialize the handler.
26
+ Initializes the handler.
27
+
17
28
  Args:
18
- name (str): name of particular handler instance
19
- connection_data (dict): parameters for connecting to the database
20
- **kwargs: arbitrary keyword arguments.
29
+ name (Text): The name of the handler instance.
30
+ connection_data (Dict): The connection data required to connect to the Jira API.
31
+ kwargs: Arbitrary keyword arguments.
21
32
  """
22
33
  super().__init__(name)
23
- self.connection_data = kwargs.get("connection_data", {})
34
+ self.connection_data = connection_data
35
+ self.kwargs = kwargs
24
36
 
25
37
  self.connection = None
26
38
  self.is_connected = False
27
39
 
28
- # todo store parsed data in files
40
+ self._register_table("projects", JiraProjectsTable(self))
41
+ self._register_table("issues", JiraIssuesTable(self))
42
+ self._register_table("groups", JiraGroupsTable(self))
43
+ self._register_table("users", JiraUsersTable(self))
29
44
 
30
- self.api_resource_generator = APIResourceGenerator(
31
- "https://developer.atlassian.com/cloud/jira/platform/swagger-v3.v3.json",
32
- self.connection_data,
33
- url_base='/rest/api/3/',
34
- options={
35
- 'offset_param': ['startAt', 'offset'],
36
- 'total_column': ['totalEntryCount', 'total'],
37
- 'check_connection_table': 'myself'
38
- }
39
- )
40
-
41
- resource_tables = self.api_resource_generator.generate_api_resources(self)
42
-
43
- for table_name, resource in resource_tables.items():
44
- self._register_table(table_name, resource)
45
+ def connect(self) -> Jira:
46
+ """
47
+ Establishes a connection to the Jira API.
45
48
 
46
- def __del__(self):
47
- if self.is_connected is True:
48
- self.disconnect()
49
+ Raises:
50
+ ValueError: If the required connection parameters are not provided.
51
+ AuthenticationError: If an authentication error occurs while connecting to the Salesforce API.
49
52
 
50
- def connect(self):
51
- """
52
- Set up the connection required by the handler.
53
53
  Returns:
54
- HandlerStatusResponse
54
+ atlassian.jira.Jira: A connection object to the Jira API.
55
55
  """
56
- return
56
+ if self.is_connected is True:
57
+ return self.connection
58
+
59
+ is_cloud = self.connection_data.get("cloud", True)
60
+
61
+ if is_cloud:
62
+ # Jira Cloud supports API token authentication.
63
+ if not all(key in self.connection_data for key in ['username', 'api_token', 'url']):
64
+ raise ValueError("Required parameters (username, api_token, url) must be provided.")
65
+
66
+ config = {
67
+ "username": self.connection_data['username'],
68
+ "password": self.connection_data['api_token'],
69
+ "url": self.connection_data['url'],
70
+ }
71
+ else:
72
+ # Jira Server supports personal access token authentication or open access.
73
+ if 'url' not in self.connection_data:
74
+ raise ValueError("Required parameter 'url' must be provided.")
75
+
76
+ config = {
77
+ "url": self.connection_data['url'],
78
+ "cloud": False
79
+ }
80
+
81
+ if 'personal_access_token' in self.connection_data:
82
+ config['session'] = ({"Authorization": f"Bearer {self.connection_data['personal_access_token']}"})
83
+
84
+ try:
85
+ self.connection = Jira(**config)
86
+ self.is_connected = True
87
+ return self.connection
88
+ except Exception as unknown_error:
89
+ logger.error(f"Unknown error connecting to Jira, {unknown_error}!")
90
+ raise
57
91
 
58
92
  def check_connection(self) -> StatusResponse:
59
93
  """
60
- Check connection to the handler.
94
+ Checks the status of the connection to the Salesforce API.
95
+
61
96
  Returns:
62
- HandlerStatusResponse
97
+ StatusResponse: An object containing the success status and an error message if an error occurs.
63
98
  """
64
-
65
99
  response = StatusResponse(False)
66
100
 
67
101
  try:
68
- self.api_resource_generator.check_connection()
102
+ connection = self.connect()
103
+ connection.myself()
69
104
  response.success = True
70
- except Exception as e:
71
- logger.error(f"Error connecting to Jira API: {e}!")
72
- response.error_message = e
105
+ except (HTTPError, ValueError) as known_error:
106
+ logger.error(f'Connection check to Jira failed, {known_error}!')
107
+ response.error_message = str(known_error)
108
+ except Exception as unknown_error:
109
+ logger.error(f'Connection check to Jira failed due to an unknown error, {unknown_error}!')
110
+ response.error_message = str(unknown_error)
73
111
 
74
112
  self.is_connected = response.success
113
+
75
114
  return response
115
+
116
+ def native_query(self, query: str) -> Response:
117
+ """
118
+ Executes a native JQL query on Jira and returns the result.
119
+
120
+ Args:
121
+ query (Text): The JQL query to be executed.
122
+
123
+ Returns:
124
+ Response: A response object containing the result of the query or an error message.
125
+ """
126
+ connection = self.connect()
127
+
128
+ try:
129
+ results = connection.jql(query)
130
+ df = JiraIssuesTable(self).normalize(results['issues'])
131
+ response = Response(
132
+ RESPONSE_TYPE.TABLE,
133
+ df
134
+ )
135
+ except HTTPError as http_error:
136
+ logger.error(f'Error running query: {query} on Jira, {http_error}!')
137
+ response = Response(
138
+ RESPONSE_TYPE.ERROR,
139
+ error_code=0,
140
+ error_message=str(http_error)
141
+ )
142
+ except Exception as unknown_error:
143
+ logger.error(f'Error running query: {query} on Jira, {unknown_error}!')
144
+ response = Response(
145
+ RESPONSE_TYPE.ERROR,
146
+ error_code=0,
147
+ error_message=str(unknown_error)
148
+ )
149
+
150
+ return response
@@ -0,0 +1,229 @@
1
+ from typing import List
2
+
3
+ from atlassian import Jira
4
+ import pandas as pd
5
+
6
+ from mindsdb.integrations.libs.api_handler import APIResource
7
+ from mindsdb.integrations.utilities.sql_utils import FilterCondition, SortColumn, FilterOperator
8
+ from mindsdb.utilities import log
9
+
10
+
11
+ logger = log.getLogger(__name__)
12
+
13
+
14
+ class JiraProjectsTable(APIResource):
15
+ def list(
16
+ self,
17
+ conditions: List[FilterCondition] = None,
18
+ limit: int = None,
19
+ sort: List[SortColumn] = None,
20
+ targets: List[str] = None,
21
+ **kwargs
22
+ ) -> pd.DataFrame:
23
+ client: Jira = self.handler.connect()
24
+
25
+ projects = []
26
+ for condition in conditions:
27
+ if condition.column in ('id', 'key'):
28
+ if condition.op == FilterOperator.EQUAL:
29
+ projects = [client.get_project(condition.value)]
30
+ elif condition.op == FilterOperator.IN:
31
+ projects = [client.get_project(project_id) for project_id in condition.value]
32
+ else:
33
+ raise ValueError(f"Unsupported operator {condition.op} for column {condition.column}.")
34
+ condition.applied = True
35
+
36
+ if not projects:
37
+ projects = client.get_all_projects()
38
+
39
+ if projects:
40
+ projects_df = pd.DataFrame(projects)
41
+ projects_df = projects_df[self.get_columns()]
42
+ else:
43
+ projects_df = pd.DataFrame([], columns=self.get_columns())
44
+
45
+ return projects_df
46
+
47
+ def get_columns(self) -> List[str]:
48
+ return [
49
+ "id",
50
+ "key",
51
+ "name",
52
+ "projectTypeKey",
53
+ "simplified",
54
+ "style",
55
+ "isPrivate",
56
+ "entityId",
57
+ "uuid",
58
+ ]
59
+
60
+
61
+ class JiraIssuesTable(APIResource):
62
+ def list(
63
+ self,
64
+ conditions: List[FilterCondition] = None,
65
+ limit: int = None,
66
+ sort: List[SortColumn] = None,
67
+ targets: List[str] = None,
68
+ **kwargs
69
+ ) -> pd.DataFrame:
70
+ client: Jira = self.handler.connect()
71
+
72
+ issues = []
73
+ for condition in conditions:
74
+ if condition.column in ('id', 'key'):
75
+ if condition.op == FilterOperator.EQUAL:
76
+ issues = [client.get_issue(condition.value)]
77
+ elif condition.op == FilterOperator.IN:
78
+ issues = [client.get_issue(issue_id) for issue_id in condition.value]
79
+ else:
80
+ raise ValueError(f"Unsupported operator {condition.op} for column {condition.column}.")
81
+ condition.applied = True
82
+
83
+ elif condition.column in ('project_id', 'project_key', 'project_name'):
84
+ if condition.op == FilterOperator.EQUAL:
85
+ issues = client.get_all_project_issues(condition.value, limit=limit)
86
+ elif condition.op == FilterOperator.IN:
87
+ for project_id in condition.value:
88
+ issues.extend(client.get_all_project_issues(project_id, limit=limit))
89
+
90
+ condition.applied = True
91
+
92
+ if not issues:
93
+ project_ids = [project['id'] for project in client.get_all_projects()]
94
+ for project_id in project_ids:
95
+ issues.extend(self._get_project_issues_with_limit(client, project_id, limit=limit, current_issues=issues))
96
+
97
+ if issues:
98
+ issues_df = self.normalize(issues)
99
+ else:
100
+ issues_df = pd.DataFrame([], columns=self.get_columns())
101
+
102
+ return issues_df
103
+
104
+ def _get_project_issues_with_limit(self, client: Jira, project_id, limit=None, current_issues=None):
105
+ """
106
+ Helper to get issues from a project, respecting the limit.
107
+ """
108
+ if current_issues is None:
109
+ current_issues = []
110
+ if limit:
111
+ remaining = limit - len(current_issues)
112
+ if remaining <= 0:
113
+ return []
114
+ return client.get_all_project_issues(project_id, limit=remaining)
115
+ else:
116
+ return client.get_all_project_issues(project_id)
117
+
118
+ def normalize(self, issues: dict) -> pd.DataFrame:
119
+ issues_df = pd.json_normalize(issues)
120
+ issues_df.rename(
121
+ columns={
122
+ "fields.project.id": "project_id",
123
+ "fields.project.key": "project_key",
124
+ "fields.project.name": "project_name",
125
+ "fields.summary": "summary",
126
+ "fields.priority.name": "priority",
127
+ "fields.creator.displayName": "creator",
128
+ "fields.assignee.displayName": "assignee",
129
+ "fields.status.name": "status",
130
+ },
131
+ inplace=True
132
+ )
133
+ issues_df = issues_df[self.get_columns()]
134
+
135
+ return issues_df
136
+
137
+ def get_columns(self) -> List[str]:
138
+ return [
139
+ "id",
140
+ "key",
141
+ "project_id",
142
+ "project_key",
143
+ "project_name",
144
+ "summary",
145
+ "priority",
146
+ "creator",
147
+ "assignee",
148
+ "status",
149
+ ]
150
+
151
+
152
+ class JiraGroupsTable(APIResource):
153
+ def list(
154
+ self,
155
+ conditions: List[FilterCondition] = None,
156
+ limit: int = None,
157
+ sort: List[SortColumn] = None,
158
+ targets: List[str] = None,
159
+ **kwargs
160
+ ) -> pd.DataFrame:
161
+ client: Jira = self.handler.connect()
162
+
163
+ if limit:
164
+ groups = client.get_groups(limit=limit)['groups']
165
+ else:
166
+ groups = client.get_groups()['groups']
167
+
168
+ if groups:
169
+ groups_df = pd.DataFrame(groups)
170
+ groups_df = groups_df[self.get_columns()]
171
+ else:
172
+ groups_df = pd.DataFrame([], columns=self.get_columns())
173
+
174
+ return groups_df
175
+
176
+ def get_columns(self) -> List[str]:
177
+ return [
178
+ "groupId",
179
+ "name",
180
+ "html",
181
+ ]
182
+
183
+
184
+ class JiraUsersTable(APIResource):
185
+ def list(
186
+ self,
187
+ conditions: List[FilterCondition] = None,
188
+ limit: int = None,
189
+ sort: List[SortColumn] = None,
190
+ targets: List[str] = None,
191
+ **kwargs
192
+ ) -> pd.DataFrame:
193
+ client: Jira = self.handler.connect()
194
+
195
+ users = []
196
+ for condition in conditions:
197
+ if condition.column == 'accountId':
198
+ if condition.op == FilterOperator.EQUAL:
199
+ users = [client.user(account_id=condition.value)]
200
+ elif condition.op == FilterOperator.IN:
201
+ users = [client.user(account_id=accountId) for accountId in condition.value]
202
+ else:
203
+ raise ValueError(f"Unsupported operator {condition.op} for column {condition.column}.")
204
+ condition.applied = True
205
+
206
+ if not users:
207
+ if limit:
208
+ users = client.users_get_all(limit=limit)
209
+ else:
210
+ users = client.users_get_all()
211
+
212
+ if users:
213
+ users_df = pd.DataFrame(users)
214
+ users_df = users_df[self.get_columns()]
215
+ else:
216
+ users_df = pd.DataFrame([], columns=self.get_columns())
217
+
218
+ return users_df
219
+
220
+ def get_columns(self) -> List[str]:
221
+ return [
222
+ "accountId",
223
+ "accountType",
224
+ "emailAddress",
225
+ "displayName",
226
+ "active",
227
+ "timeZone",
228
+ "locale",
229
+ ]
@@ -0,0 +1 @@
1
+ atlassian-python-api
@@ -1 +1,2 @@
1
1
  lightfm==1.17
2
+ dataprep_ml==24.5.1.2
@@ -21,8 +21,6 @@ from mindsdb.utilities.functions import cast_row_types
21
21
 
22
22
  from .functions import run_finetune, run_learn
23
23
 
24
- IS_PY36 = sys.version_info[1] <= 6
25
-
26
24
 
27
25
  class NumpyJSONEncoder(json.JSONEncoder):
28
26
  """
@@ -1,4 +1,4 @@
1
- lightwood>=25.3.3.3
2
- lightwood[extra]>=25.3.3.3
3
- lightwood[xai]>=25.3.3.3
4
- type_infer==0.0.20
1
+ lightwood>=25.5.2.2
2
+ lightwood[extra]>=25.5.2.2
3
+ lightwood[xai]>=25.5.2.2
4
+ type_infer==0.0.22
@@ -1,2 +1,3 @@
1
1
  pyphoenix
2
2
  phoenixdb
3
+ protobuf==3.20.3
@@ -0,0 +1,2 @@
1
+ msal
2
+ -r mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/requirements.txt
@@ -1,2 +1,4 @@
1
1
  botframework-connector
2
- botbuilder-schema
2
+ botbuilder-schema
3
+ msal
4
+ -r mindsdb/integrations/utilities/handlers/auth_utilities/microsoft/requirements.txt
@@ -9,6 +9,7 @@ import subprocess
9
9
  import concurrent.futures
10
10
  from typing import Text, Tuple, Dict, List, Optional, Any
11
11
  import openai
12
+ from openai.types.fine_tuning import FineTuningJob
12
13
  from openai import OpenAI, AzureOpenAI, NotFoundError, AuthenticationError
13
14
  import numpy as np
14
15
  import pandas as pd
@@ -1116,7 +1117,7 @@ class OpenAIHandler(BaseMLEngine):
1116
1117
  }
1117
1118
  return {**ft_params, **extra_params}
1118
1119
 
1119
- def _ft_call(self, ft_params: Dict, client: OpenAI, hour_budget: int) -> Tuple[openai.types.fine_tuning.FineTuningJob, Text]:
1120
+ def _ft_call(self, ft_params: Dict, client: OpenAI, hour_budget: int) -> Tuple[FineTuningJob, Text]:
1120
1121
  """
1121
1122
  Submit a fine-tuning job via the OpenAI API.
1122
1123
  This method handles requests to both the legacy and new endpoints.
@@ -1134,7 +1135,7 @@ class OpenAIHandler(BaseMLEngine):
1134
1135
  PendingFT: If the fine-tuning process is still pending.
1135
1136
 
1136
1137
  Returns:
1137
- Tuple[openai.types.fine_tuning.FineTuningJob, Text]: Fine-tuning stats and result file ID.
1138
+ Tuple[FineTuningJob, Text]: Fine-tuning stats and result file ID.
1138
1139
  """
1139
1140
  ft_result = client.fine_tuning.jobs.create(
1140
1141
  **{k: v for k, v in ft_params.items() if v is not None}
@@ -1143,7 +1144,7 @@ class OpenAIHandler(BaseMLEngine):
1143
1144
  @retry_with_exponential_backoff(
1144
1145
  hour_budget=hour_budget,
1145
1146
  )
1146
- def _check_ft_status(job_id: Text) -> openai.types.fine_tuning.FineTuningJob:
1147
+ def _check_ft_status(job_id: Text) -> FineTuningJob:
1147
1148
  """
1148
1149
  Check the status of a fine-tuning job via the OpenAI API.
1149
1150
 
@@ -1154,7 +1155,7 @@ class OpenAIHandler(BaseMLEngine):
1154
1155
  PendingFT: If the fine-tuning process is still pending.
1155
1156
 
1156
1157
  Returns:
1157
- openai.types.fine_tuning.FineTuningJob: Fine-tuning stats.
1158
+ FineTuningJob: Fine-tuning stats.
1158
1159
  """
1159
1160
  ft_retrieved = client.fine_tuning.jobs.retrieve(fine_tuning_job_id=job_id)
1160
1161
  if ft_retrieved.status in ('succeeded', 'failed', 'cancelled'):
@@ -1,2 +1,2 @@
1
- snowflake-connector-python[pandas]==3.13.1
1
+ snowflake-connector-python[pandas]==3.15.0
2
2
  snowflake-sqlalchemy==1.7.0
@@ -1 +1,2 @@
1
1
  google-cloud-aiplatform>=1.35.0
2
+ -r mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt
@@ -1,2 +1,3 @@
1
1
  google-api-python-client
2
2
  youtube-transcript-api
3
+ -r mindsdb/integrations/utilities/handlers/auth_utilities/google/requirements.txt
@@ -9,8 +9,6 @@ from typing import List
9
9
  import filetype
10
10
  import pandas as pd
11
11
  from charset_normalizer import from_bytes
12
- from langchain_text_splitters import RecursiveCharacterTextSplitter
13
- import fitz # pymupdf
14
12
 
15
13
  from mindsdb.utilities import log
16
14
 
@@ -298,6 +296,8 @@ class FileReader(FormatDetector):
298
296
 
299
297
  @staticmethod
300
298
  def read_txt(file_obj: BytesIO, name=None, **kwargs):
299
+ # the lib is heavy, so import it only when needed
300
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
301
301
  file_obj = decode(file_obj)
302
302
 
303
303
  try:
@@ -326,6 +326,9 @@ class FileReader(FormatDetector):
326
326
 
327
327
  @staticmethod
328
328
  def read_pdf(file_obj: BytesIO, name=None, **kwargs):
329
+ # the libs are heavy, so import it only when needed
330
+ import fitz # pymupdf
331
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
329
332
 
330
333
  with fitz.open(stream=file_obj.read()) as pdf: # open pdf
331
334
  text = chr(12).join([page.get_text() for page in pdf])
@@ -4,8 +4,20 @@ from langchain.agents import AgentType
4
4
  from langchain_openai import OpenAIEmbeddings
5
5
 
6
6
  from types import MappingProxyType
7
- from mindsdb.integrations.handlers.openai_handler.constants import (
8
- CHAT_MODELS as OPEN_AI_CHAT_MODELS,
7
+
8
+ # the same as
9
+ # from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS
10
+ OPEN_AI_CHAT_MODELS = (
11
+ 'gpt-3.5-turbo',
12
+ 'gpt-3.5-turbo-16k',
13
+ 'gpt-3.5-turbo-instruct',
14
+ 'gpt-4',
15
+ 'gpt-4-32k',
16
+ 'gpt-4-1106-preview',
17
+ 'gpt-4-0125-preview',
18
+ 'gpt-4o',
19
+ 'o3-mini',
20
+ 'o1-mini'
9
21
  )
10
22
 
11
23
  SUPPORTED_PROVIDERS = {
@@ -24,9 +24,6 @@ from langchain_core.messages.base import BaseMessage
24
24
  from langchain_core.prompts import PromptTemplate
25
25
  from langchain_core.tools import Tool
26
26
 
27
- from mindsdb.integrations.handlers.openai_handler.constants import (
28
- CHAT_MODELS as OPEN_AI_CHAT_MODELS,
29
- )
30
27
  from mindsdb.integrations.libs.llm.utils import get_llm_config
31
28
  from mindsdb.integrations.utilities.handler_utils import get_api_key
32
29
  from mindsdb.integrations.utilities.rag.settings import DEFAULT_RAG_PROMPT_TEMPLATE
@@ -42,7 +39,8 @@ from .callback_handlers import LogCallbackHandler, ContextCaptureCallback
42
39
  from .langfuse_callback_handler import LangfuseCallbackHandler, get_skills
43
40
  from .safe_output_parser import SafeOutputParser
44
41
 
45
- from .constants import (
42
+ from mindsdb.interfaces.agents.constants import (
43
+ OPEN_AI_CHAT_MODELS,
46
44
  DEFAULT_AGENT_TIMEOUT_SECONDS,
47
45
  DEFAULT_AGENT_TYPE,
48
46
  DEFAULT_EMBEDDINGS_MODEL_PROVIDER,