MindsDB 25.2.2.2__py3-none-any.whl → 25.2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (54) hide show
  1. {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/METADATA +209 -228
  2. {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/RECORD +52 -50
  3. mindsdb/__about__.py +1 -1
  4. mindsdb/__main__.py +1 -11
  5. mindsdb/api/executor/datahub/datanodes/system_tables.py +4 -1
  6. mindsdb/api/http/initialize.py +8 -5
  7. mindsdb/api/http/namespaces/agents.py +0 -7
  8. mindsdb/api/http/namespaces/config.py +0 -48
  9. mindsdb/api/http/namespaces/databases.py +69 -1
  10. mindsdb/api/http/namespaces/knowledge_bases.py +1 -1
  11. mindsdb/api/http/namespaces/util.py +0 -28
  12. mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -1
  13. mindsdb/integrations/handlers/dspy_handler/requirements.txt +0 -1
  14. mindsdb/integrations/handlers/file_handler/file_handler.py +28 -46
  15. mindsdb/integrations/handlers/file_handler/tests/test_file_handler.py +8 -11
  16. mindsdb/integrations/handlers/langchain_embedding_handler/requirements.txt +0 -1
  17. mindsdb/integrations/handlers/langchain_handler/requirements.txt +0 -1
  18. mindsdb/integrations/handlers/llama_index_handler/requirements.txt +0 -1
  19. mindsdb/integrations/handlers/ms_one_drive_handler/ms_one_drive_tables.py +1 -1
  20. mindsdb/integrations/handlers/openai_handler/constants.py +3 -1
  21. mindsdb/integrations/handlers/openai_handler/requirements.txt +0 -1
  22. mindsdb/integrations/handlers/rag_handler/requirements.txt +0 -1
  23. mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +33 -8
  24. mindsdb/integrations/handlers/timegpt_handler/requirements.txt +1 -1
  25. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +3 -2
  26. mindsdb/integrations/handlers/web_handler/web_handler.py +42 -33
  27. mindsdb/integrations/handlers/youtube_handler/__init__.py +2 -0
  28. mindsdb/integrations/handlers/youtube_handler/connection_args.py +32 -0
  29. mindsdb/integrations/libs/llm/utils.py +5 -0
  30. mindsdb/integrations/libs/process_cache.py +2 -2
  31. mindsdb/integrations/utilities/files/file_reader.py +66 -14
  32. mindsdb/integrations/utilities/rag/chains/local_context_summarizer_chain.py +227 -0
  33. mindsdb/interfaces/agents/agents_controller.py +3 -3
  34. mindsdb/interfaces/agents/callback_handlers.py +52 -5
  35. mindsdb/interfaces/agents/langchain_agent.py +5 -3
  36. mindsdb/interfaces/database/database.py +1 -1
  37. mindsdb/interfaces/database/integrations.py +1 -1
  38. mindsdb/interfaces/file/file_controller.py +140 -11
  39. mindsdb/interfaces/jobs/scheduler.py +1 -1
  40. mindsdb/interfaces/knowledge_base/preprocessing/constants.py +2 -2
  41. mindsdb/interfaces/skills/skills_controller.py +2 -2
  42. mindsdb/interfaces/skills/sql_agent.py +6 -1
  43. mindsdb/interfaces/storage/db.py +1 -12
  44. mindsdb/migrations/versions/2025-02-09_4943359e354a_file_metadata.py +31 -0
  45. mindsdb/migrations/versions/2025-02-10_6ab9903fc59a_del_log_table.py +33 -0
  46. mindsdb/utilities/config.py +1 -0
  47. mindsdb/utilities/log.py +17 -2
  48. mindsdb/utilities/ml_task_queue/consumer.py +4 -2
  49. mindsdb/utilities/render/sqlalchemy_render.py +15 -5
  50. mindsdb/utilities/log_controller.py +0 -39
  51. mindsdb/utilities/telemetry.py +0 -44
  52. {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/LICENSE +0 -0
  53. {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/WHEEL +0 -0
  54. {MindsDB-25.2.2.2.dist-info → MindsDB-25.2.4.0.dist-info}/top_level.txt +0 -0
@@ -9,12 +9,6 @@ from flask import current_app as ca
9
9
 
10
10
  from mindsdb.metrics.metrics import api_endpoint_metrics
11
11
  from mindsdb.api.http.namespaces.configs.util import ns_conf
12
- from mindsdb.utilities.telemetry import (
13
- enable_telemetry,
14
- disable_telemetry,
15
- telemetry_file_exists,
16
- inject_telemetry_to_static
17
- )
18
12
  from mindsdb.api.http.gui import update_static
19
13
  from mindsdb.utilities.fs import clean_unlinked_process_marks
20
14
  from mindsdb.api.http.utils import http_error
@@ -98,28 +92,6 @@ class PingNative(Resource):
98
92
  return get_active_tasks()
99
93
 
100
94
 
101
- @ns_conf.route('/telemetry')
102
- class Telemetry(Resource):
103
- @ns_conf.doc('get_telemetry_status')
104
- @api_endpoint_metrics('GET', '/util/telemetry')
105
- def get(self):
106
- root_storage_path = ca.config_obj['paths']['root']
107
- status = "enabled" if telemetry_file_exists(root_storage_path) else "disabled"
108
- return {"status": status}
109
-
110
- @ns_conf.doc('set_telemetry')
111
- @api_endpoint_metrics('POST', '/util/telemetry')
112
- def post(self):
113
- data = request.json
114
- action = data['action']
115
- if str(action).lower() in ["true", "enable", "on"]:
116
- enable_telemetry(ca.config_obj['paths']['root'])
117
- else:
118
- disable_telemetry(ca.config_obj['paths']['root'])
119
- inject_telemetry_to_static(ca.config_obj.paths['static'])
120
- return '', 200
121
-
122
-
123
95
  @ns_conf.route('/validate_json_ai')
124
96
  class ValidateJsonAI(Resource):
125
97
  @api_endpoint_metrics('POST', '/util/validate_json_ai')
@@ -1,3 +1,2 @@
1
- openai == 1.24.0
2
1
  pydantic-settings >= 2.1.0
3
2
  -r mindsdb/integrations/handlers/openai_handler/requirements.txt
@@ -1,4 +1,3 @@
1
- openai<2.0.0,>=1.54.0
2
1
  wikipedia==1.4.0
3
2
  tiktoken
4
3
  anthropic>=0.26.1
@@ -1,11 +1,10 @@
1
1
  import os
2
2
  import shutil
3
3
  import tempfile
4
- from pathlib import Path
5
4
 
6
5
  import pandas as pd
7
6
  from mindsdb_sql_parser import parse_sql
8
- from mindsdb_sql_parser.ast import CreateTable, DropTables, Insert, Select
7
+ from mindsdb_sql_parser.ast import CreateTable, DropTables, Insert, Select, Identifier
9
8
  from mindsdb_sql_parser.ast.base import ASTNode
10
9
 
11
10
  from mindsdb.api.executor.utilities.sql import query_df
@@ -15,8 +14,6 @@ from mindsdb.integrations.libs.response import HandlerResponse as Response
15
14
  from mindsdb.integrations.libs.response import HandlerStatusResponse as StatusResponse
16
15
  from mindsdb.utilities import log
17
16
 
18
- from mindsdb.integrations.utilities.files.file_reader import FileReader
19
-
20
17
 
21
18
  logger = log.getLogger(__name__)
22
19
 
@@ -63,6 +60,18 @@ class FileHandler(DatabaseHandler):
63
60
  def check_connection(self) -> StatusResponse:
64
61
  return StatusResponse(True)
65
62
 
63
+ def _get_table_page_names(self, table: Identifier):
64
+ table_name_parts = table.parts
65
+
66
+ # Check if it's a multi-part name (e.g., `file_name.sheet_name`)
67
+ if len(table_name_parts) > 1:
68
+ table_name = table_name_parts[-2]
69
+ page_name = table_name_parts[-1] # Get the sheet name
70
+ else:
71
+ table_name = table_name_parts[-1]
72
+ page_name = None
73
+ return table_name, page_name
74
+
66
75
  def query(self, query: ASTNode) -> Response:
67
76
  if type(query) is DropTables:
68
77
  for table_identifier in query.tables:
@@ -84,7 +93,7 @@ class FileHandler(DatabaseHandler):
84
93
  )
85
94
  return Response(RESPONSE_TYPE.OK)
86
95
 
87
- if type(query) is CreateTable:
96
+ if isinstance(query, CreateTable):
88
97
  # Check if the table already exists or if the table name contains more than one namespace
89
98
  existing_files = self.file_controller.get_files_names()
90
99
 
@@ -96,13 +105,13 @@ class FileHandler(DatabaseHandler):
96
105
 
97
106
  table_name = query.name.parts[-1]
98
107
  if table_name in existing_files:
99
- return Response(
100
- RESPONSE_TYPE.ERROR,
101
- error_message=f"Table '{table_name}' already exists",
102
- )
103
-
104
- if query.is_replace:
105
- self.file_controller.delete_file(table_name)
108
+ if query.is_replace:
109
+ self.file_controller.delete_file(table_name)
110
+ else:
111
+ return Response(
112
+ RESPONSE_TYPE.ERROR,
113
+ error_message=f"Table '{table_name}' already exists",
114
+ )
106
115
 
107
116
  temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_")
108
117
 
@@ -126,31 +135,19 @@ class FileHandler(DatabaseHandler):
126
135
 
127
136
  return Response(RESPONSE_TYPE.OK)
128
137
 
129
- elif type(query) is Select:
130
- table_name_parts = query.from_table.parts
131
- table_name = table_name_parts[-1]
132
-
133
- # Check if it's a multi-part name (e.g., `files.file_name.sheet_name`)
134
- if len(table_name_parts) > 1:
135
- table_name = table_name_parts[-2]
136
- sheet_name = table_name_parts[-1] # Get the sheet name
137
- else:
138
- sheet_name = None
139
- file_path = self.file_controller.get_file_path(table_name)
138
+ elif isinstance(query, Select):
139
+ table_name, page_name = self._get_table_page_names(query.from_table)
140
140
 
141
- df = self.handle_source(file_path, sheet_name=sheet_name)
141
+ df = self.file_controller.get_file_data(table_name, page_name)
142
142
 
143
143
  # Process the SELECT query
144
144
  result_df = query_df(df, query)
145
145
  return Response(RESPONSE_TYPE.TABLE, data_frame=result_df)
146
146
 
147
- elif type(query) is Insert:
148
- table_name = query.table.parts[-1]
149
- file_path = self.file_controller.get_file_path(table_name)
147
+ elif isinstance(query, Insert):
148
+ table_name, page_name = self._get_table_page_names(query.table)
150
149
 
151
- file_reader = FileReader(path=file_path)
152
-
153
- df = file_reader.to_df()
150
+ df = self.file_controller.get_file_data(table_name, page_name)
154
151
 
155
152
  # Create a new dataframe with the values from the query
156
153
  new_df = pd.DataFrame(query.values, columns=[col.name for col in query.columns])
@@ -158,10 +155,7 @@ class FileHandler(DatabaseHandler):
158
155
  # Concatenate the new dataframe with the existing one
159
156
  df = pd.concat([df, new_df], ignore_index=True)
160
157
 
161
- # Write the concatenated data to the file based on its format
162
- format = Path(file_path).suffix.strip(".").lower()
163
- write_method = getattr(df, f"to_{format}")
164
- write_method(file_path, index=False)
158
+ self.file_controller.set_file_data(table_name, df, page_name=page_name)
165
159
 
166
160
  return Response(RESPONSE_TYPE.OK)
167
161
 
@@ -175,18 +169,6 @@ class FileHandler(DatabaseHandler):
175
169
  ast = self.parser(query)
176
170
  return self.query(ast)
177
171
 
178
- @staticmethod
179
- def handle_source(file_path, **kwargs):
180
- file_reader = FileReader(path=file_path)
181
-
182
- df = file_reader.to_df(**kwargs)
183
-
184
- header = df.columns.values.tolist()
185
-
186
- df.columns = [key.strip() for key in header]
187
- df = df.applymap(clean_cell)
188
- return df
189
-
190
172
  def get_tables(self) -> Response:
191
173
  """
192
174
  List all files
@@ -64,6 +64,12 @@ class MockFileController:
64
64
  def save_file(self, name, file_path, file_name=None):
65
65
  return True
66
66
 
67
+ def get_file_data(self, name, page_name=None):
68
+ return pandas.DataFrame(test_file_content[1:], columns=test_file_content[0])
69
+
70
+ def set_file_data(self, name, df, page_name=None):
71
+ return True
72
+
67
73
 
68
74
  def curr_dir():
69
75
  return os.path.dirname(os.path.realpath(__file__))
@@ -296,18 +302,9 @@ def test_handle_source(file_path, expected_columns):
296
302
 
297
303
  # using different methods to create reader
298
304
  for reader in get_reader(file_path):
299
- df = reader.to_df()
305
+ df = reader.get_page_content()
300
306
  assert isinstance(df, pandas.DataFrame)
301
307
 
302
- if reader.get_format() == 'xlsx':
303
-
304
- assert df.columns.tolist() == test_excel_sheet_content[0]
305
- assert len(df) == len(test_excel_sheet_content) - 1
306
- assert df.values.tolist() == test_excel_sheet_content[1:]
307
- sheet_name = test_excel_sheet_content[1][0]
308
-
309
- df = reader.to_df(sheet_name=sheet_name)
310
-
311
308
  assert df.columns.tolist() == expected_columns
312
309
 
313
310
  # The pdf and txt files have some different content
@@ -336,7 +333,7 @@ def test_tsv():
336
333
  assert reader.get_format() == 'csv'
337
334
  assert reader.parameters['delimiter'] == '\t'
338
335
 
339
- df = reader.to_df()
336
+ df = reader.get_page_content()
340
337
  assert len(df.columns) == 2
341
338
 
342
339
 
@@ -1,2 +1 @@
1
- openai==1.55.3
2
1
  tiktoken
@@ -1,4 +1,3 @@
1
- openai==1.55.3
2
1
  wikipedia==1.4.0
3
2
  tiktoken
4
3
  anthropic>=0.26.1
@@ -1,5 +1,4 @@
1
1
  llama-index==0.10.13
2
- openai == 1.24.0
3
2
  pydantic-settings >= 2.1.0
4
3
  llama-index-readers-web
5
4
  llama-index-embeddings-openai
@@ -87,4 +87,4 @@ class FileTable(APIResource):
87
87
 
88
88
  reader = FileReader(file=BytesIO(file_content), name=table_name)
89
89
 
90
- return reader.to_df()
90
+ return reader.get_page_content()
@@ -8,7 +8,9 @@ CHAT_MODELS = (
8
8
  'gpt-4-32k',
9
9
  'gpt-4-1106-preview',
10
10
  'gpt-4-0125-preview',
11
- 'gpt-4o'
11
+ 'gpt-4o',
12
+ 'o3-mini',
13
+ 'o1-mini'
12
14
  )
13
15
  COMPLETION_MODELS = ('babbage-002', 'davinci-002')
14
16
  FINETUNING_MODELS = ('gpt-3.5-turbo', 'babbage-002', 'davinci-002', 'gpt-4')
@@ -1,2 +1 @@
1
- openai<2.0.0,>=1.54.0
2
1
  tiktoken
@@ -1,6 +1,5 @@
1
1
  -r mindsdb/integrations/handlers/chromadb_handler/requirements.txt
2
2
  faiss-cpu
3
- openai==1.55.3
4
3
  html2text
5
4
  writerai~=1.1.0
6
5
  sentence-transformers # needed for HuggingFaceEmbeddings from langchain-community
@@ -1,9 +1,11 @@
1
+ import io
1
2
  import json
2
3
 
3
4
  import requests
4
5
  from typing import Dict, Optional
5
6
 
6
7
  import pandas as pd
8
+ import pyarrow.parquet as pq
7
9
 
8
10
  from mindsdb.integrations.libs.base import BaseMLEngine
9
11
 
@@ -37,9 +39,17 @@ class RayServeHandler(BaseMLEngine):
37
39
  args['target'] = target
38
40
  self.model_storage.json_set('args', args)
39
41
  try:
40
- resp = requests.post(args['train_url'],
41
- json={'df': df.to_json(orient='records'), 'target': target},
42
- headers={'content-type': 'application/json; format=pandas-records'})
42
+ if args.get('is_parquet', False):
43
+ buffer = io.BytesIO()
44
+ df.to_parquet(buffer)
45
+ resp = requests.post(args['train_url'],
46
+ files={"df": ("df", buffer.getvalue(), "application/octet-stream")},
47
+ data={"args": json.dumps(args), "target": target},
48
+ )
49
+ else:
50
+ resp = requests.post(args['train_url'],
51
+ json={'df': df.to_json(orient='records'), 'target': target, 'args': args},
52
+ headers={'content-type': 'application/json; format=pandas-records'})
43
53
  except requests.exceptions.InvalidSchema:
44
54
  raise Exception("Error: The URL provided for the training endpoint is invalid.")
45
55
 
@@ -59,14 +69,29 @@ class RayServeHandler(BaseMLEngine):
59
69
  args = {**(self.model_storage.json_get('args')), **args} # merge incoming args
60
70
  pred_args = args.get('predict_params', {})
61
71
  args = {**args, **pred_args} # merge pred_args
62
- resp = requests.post(args['predict_url'],
63
- json={'df': df.to_json(orient='records'), 'pred_args': pred_args},
64
- headers={'content-type': 'application/json; format=pandas-records'})
65
-
72
+ if args.get('is_parquet', False):
73
+ buffer = io.BytesIO()
74
+ df.attrs['pred_args'] = pred_args
75
+ df.to_parquet(buffer)
76
+ resp = requests.post(args['predict_url'],
77
+ files={"df": ("df", buffer.getvalue(), "application/octet-stream")},
78
+ data={"pred_args": json.dumps(pred_args)},
79
+ )
80
+ else:
81
+ resp = requests.post(args['predict_url'],
82
+ json={'df': df.to_json(orient='records'), 'pred_args': pred_args},
83
+ headers={'content-type': 'application/json; format=pandas-records'})
66
84
  try:
67
- response = resp.json()
85
+ if args.get('is_parquet', False):
86
+ buffer = io.BytesIO(resp.content)
87
+ table = pq.read_table(buffer)
88
+ response = table.to_pandas()
89
+ else:
90
+ response = resp.json()
68
91
  except json.JSONDecodeError:
69
92
  error = resp.text
93
+ except Exception:
94
+ error = 'Could not decode parquet.'
70
95
  else:
71
96
  if 'prediction' in response:
72
97
  target = args['target']
@@ -1 +1 @@
1
- nixtla==0.5.0
1
+ nixtla==0.6.6
@@ -220,8 +220,6 @@ def get_all_website_links_recursively(url, reviewed_urls, limit=None, crawl_dept
220
220
  if limit is not None:
221
221
  if len(reviewed_urls) >= limit:
222
222
  return reviewed_urls
223
- if crawl_depth == current_depth:
224
- return reviewed_urls
225
223
 
226
224
  if not filters:
227
225
  matches_filter = True
@@ -241,6 +239,9 @@ def get_all_website_links_recursively(url, reviewed_urls, limit=None, crawl_dept
241
239
  "error": str(error_message),
242
240
  }
243
241
 
242
+ if crawl_depth is not None and crawl_depth == current_depth:
243
+ return reviewed_urls
244
+
244
245
  to_rev_url_list = []
245
246
 
246
247
  # create a list of new urls to review that don't exist in the already reviewed ones
@@ -1,62 +1,71 @@
1
+ from typing import List
2
+
1
3
  import pandas as pd
2
4
  from mindsdb.integrations.libs.response import HandlerStatusResponse
3
- from mindsdb_sql_parser import ast
4
- from mindsdb.integrations.libs.api_handler import APIHandler, APITable
5
- from mindsdb.utilities.config import Config
6
- from mindsdb.integrations.utilities.sql_utils import extract_comparison_conditions, project_dataframe
5
+ from mindsdb.utilities.config import config
7
6
  from mindsdb.utilities.security import validate_urls
8
7
  from .urlcrawl_helpers import get_all_websites
9
8
 
9
+ from mindsdb.integrations.libs.api_handler import APIResource, APIHandler
10
+ from mindsdb.integrations.utilities.sql_utils import (FilterCondition, FilterOperator)
10
11
 
11
- class CrawlerTable(APITable):
12
12
 
13
- def __init__(self, handler: APIHandler):
14
- super().__init__(handler)
15
- self.config = Config()
13
+ class CrawlerTable(APIResource):
16
14
 
17
- def select(self, query: ast.Select) -> pd.DataFrame:
15
+ def list(
16
+ self,
17
+ conditions: List[FilterCondition] = None,
18
+ limit: int = None,
19
+ **kwargs
20
+ ) -> pd.DataFrame:
18
21
  """
19
22
  Selects data from the provided websites
20
23
 
21
- Args:
22
- query (ast.Select): Given SQL SELECT query
23
-
24
24
  Returns:
25
25
  dataframe: Dataframe containing the crawled data
26
26
 
27
27
  Raises:
28
28
  NotImplementedError: If the query is not supported
29
29
  """
30
- conditions = extract_comparison_conditions(query.where)
31
30
  urls = []
32
- for operator, arg1, arg2 in conditions:
33
- if operator == 'or':
34
- raise NotImplementedError('OR is not supported')
35
- if arg1 == 'url':
36
- if operator in ['=', 'in']:
37
- urls = [str(arg2)] if isinstance(arg2, str) else arg2
38
- else:
39
- raise NotImplementedError('Invalid URL format. Please provide a single URL like url = "example.com" or'
40
- 'multiple URLs using the format url IN ("url1", "url2", ...)')
31
+ crawl_depth = None
32
+ per_url_limit = None
33
+ for condition in conditions:
34
+ if condition.column == 'url':
35
+ if condition.op == FilterOperator.IN:
36
+ urls = condition.value
37
+ elif condition.op == FilterOperator.EQUAL:
38
+ urls = [condition.value]
39
+ condition.applied = True
40
+ if condition.column == 'crawl_depth' and condition.op == FilterOperator.EQUAL:
41
+ crawl_depth = condition.value
42
+ condition.applied = True
43
+ if condition.column == 'per_url_limit' and condition.op == FilterOperator.EQUAL:
44
+ per_url_limit = condition.value
45
+ condition.applied = True
41
46
 
42
47
  if len(urls) == 0:
43
48
  raise NotImplementedError(
44
- 'You must specify what url you want to crawl, for example: SELECT * FROM crawl WHERE url = "someurl"')
49
+ 'You must specify what url you want to crawl, for example: SELECT * FROM web.crawler WHERE url = "someurl"')
45
50
 
46
- allowed_urls = self.config.get('web_crawling_allowed_sites', [])
51
+ allowed_urls = config.get('web_crawling_allowed_sites', [])
47
52
  if allowed_urls and not validate_urls(urls, allowed_urls):
48
53
  raise ValueError(f"The provided URL is not allowed for web crawling. Please use any of {', '.join(allowed_urls)}.")
49
54
 
50
- if query.limit is None:
51
- raise NotImplementedError('You must specify a LIMIT clause which defines the number of pages to crawl')
52
-
53
- limit = query.limit.value
54
-
55
- result = get_all_websites(urls, limit, html=False)
56
- if len(result) > limit:
55
+ if limit is None and per_url_limit is None and crawl_depth is None:
56
+ per_url_limit = 1
57
+ if per_url_limit is not None:
58
+ # crawl every url separately
59
+ results = []
60
+ for url in urls:
61
+ results.append(get_all_websites([url], per_url_limit, crawl_depth=crawl_depth))
62
+ result = pd.concat(results)
63
+ else:
64
+ result = get_all_websites(urls, limit, crawl_depth=crawl_depth)
65
+
66
+ if limit is not None and len(result) > limit:
57
67
  result = result[:limit]
58
- # filter targets
59
- result = project_dataframe(result, query.targets, self.get_columns())
68
+
60
69
  return result
61
70
 
62
71
  def get_columns(self):
@@ -5,6 +5,7 @@ from .__about__ import __version__ as version, __description__ as description
5
5
 
6
6
  try:
7
7
  from .youtube_handler import YoutubeHandler as Handler
8
+ from .connection_args import connection_args
8
9
  import_error = None
9
10
  except Exception as e:
10
11
  Handler = None
@@ -24,4 +25,5 @@ __all__ = [
24
25
  "description",
25
26
  "import_error",
26
27
  "icon_path",
28
+ "connection_args",
27
29
  ]
@@ -0,0 +1,32 @@
1
+ from collections import OrderedDict
2
+
3
+ from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE
4
+
5
+
6
+ connection_args = OrderedDict(
7
+ youtube_api_token={
8
+ 'type': ARG_TYPE.STR,
9
+ 'description': 'Youtube API Token',
10
+ 'label': 'Youtube API Token',
11
+ },
12
+ credentials_url={
13
+ 'type': ARG_TYPE.STR,
14
+ 'description': 'URL to Service Account Keys',
15
+ 'label': 'URL to Service Account Keys',
16
+ },
17
+ credentials_file={
18
+ 'type': ARG_TYPE.STR,
19
+ 'description': 'Location of Service Account Keys',
20
+ 'label': 'Path to Service Account Keys',
21
+ },
22
+ credentials={
23
+ 'type': ARG_TYPE.PATH,
24
+ 'description': 'Service Account Keys',
25
+ 'label': 'Upload Service Account Keys',
26
+ },
27
+ code={
28
+ 'type': ARG_TYPE.STR,
29
+ 'description': 'Code After Authorisation',
30
+ 'label': 'Code After Authorisation',
31
+ },
32
+ )
@@ -115,6 +115,11 @@ def get_llm_config(provider: str, args: Dict) -> BaseLLMConfig:
115
115
  """
116
116
  temperature = min(1.0, max(0.0, args.get("temperature", 0.0)))
117
117
  if provider == "openai":
118
+
119
+ if any(x in args.get("model_name", "") for x in ['o1', 'o3']):
120
+ # for o1 and 03, 'temperature' does not support 0.0 with this model. Only the default (1) value is supported
121
+ temperature = 1
122
+
118
123
  return OpenAIConfig(
119
124
  model_name=args.get("model_name", DEFAULT_OPENAI_MODEL),
120
125
  temperature=temperature,
@@ -186,7 +186,6 @@ class ProcessCache:
186
186
  self._keep_alive = {}
187
187
  self._stop_event = threading.Event()
188
188
  self.cleaner_thread = None
189
- self._start_clean()
190
189
 
191
190
  def __del__(self):
192
191
  self._stop_clean()
@@ -200,7 +199,7 @@ class ProcessCache:
200
199
  ):
201
200
  return
202
201
  self._stop_event.clear()
203
- self.cleaner_thread = threading.Thread(target=self._clean)
202
+ self.cleaner_thread = threading.Thread(target=self._clean, name='ProcessCache.clean')
204
203
  self.cleaner_thread.daemon = True
205
204
  self.cleaner_thread.start()
206
205
 
@@ -258,6 +257,7 @@ class ProcessCache:
258
257
  Returns:
259
258
  Future
260
259
  """
260
+ self._start_clean()
261
261
  handler_module_path = payload['handler_meta']['module_path']
262
262
  integration_id = payload['handler_meta']['integration_id']
263
263
  if task_type in (ML_TASK_TYPE.LEARN, ML_TASK_TYPE.FINETUNE):