MindsDB 25.7.3.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (102) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +11 -1
  3. mindsdb/api/a2a/common/server/server.py +16 -6
  4. mindsdb/api/executor/command_executor.py +215 -150
  5. mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
  6. mindsdb/api/executor/planner/plan_join.py +3 -0
  7. mindsdb/api/executor/planner/plan_join_ts.py +117 -100
  8. mindsdb/api/executor/planner/query_planner.py +1 -0
  9. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
  10. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
  11. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
  12. mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
  13. mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
  14. mindsdb/api/executor/utilities/sql.py +30 -0
  15. mindsdb/api/http/initialize.py +18 -44
  16. mindsdb/api/http/namespaces/agents.py +23 -20
  17. mindsdb/api/http/namespaces/chatbots.py +83 -120
  18. mindsdb/api/http/namespaces/file.py +1 -1
  19. mindsdb/api/http/namespaces/jobs.py +38 -60
  20. mindsdb/api/http/namespaces/tree.py +69 -61
  21. mindsdb/api/http/namespaces/views.py +56 -72
  22. mindsdb/api/mcp/start.py +2 -0
  23. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
  24. mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
  25. mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
  26. mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
  27. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
  28. mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
  29. mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
  30. mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
  31. mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
  32. mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
  33. mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
  34. mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
  35. mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
  36. mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
  37. mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
  38. mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
  39. mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
  40. mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
  41. mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
  42. mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
  43. mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
  44. mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
  45. mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -77
  46. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  47. mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +5 -2
  48. mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
  49. mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
  50. mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
  51. mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
  52. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
  53. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
  54. mindsdb/integrations/handlers/salesforce_handler/constants.py +215 -0
  55. mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +141 -80
  56. mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +0 -1
  57. mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
  58. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
  59. mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
  60. mindsdb/integrations/libs/llm/config.py +0 -14
  61. mindsdb/integrations/libs/llm/utils.py +0 -15
  62. mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
  63. mindsdb/integrations/utilities/files/file_reader.py +5 -19
  64. mindsdb/integrations/utilities/handler_utils.py +32 -12
  65. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
  66. mindsdb/interfaces/agents/agents_controller.py +246 -149
  67. mindsdb/interfaces/agents/constants.py +0 -1
  68. mindsdb/interfaces/agents/langchain_agent.py +11 -6
  69. mindsdb/interfaces/data_catalog/data_catalog_loader.py +4 -4
  70. mindsdb/interfaces/database/database.py +38 -13
  71. mindsdb/interfaces/database/integrations.py +20 -5
  72. mindsdb/interfaces/database/projects.py +174 -23
  73. mindsdb/interfaces/database/views.py +86 -60
  74. mindsdb/interfaces/jobs/jobs_controller.py +103 -110
  75. mindsdb/interfaces/knowledge_base/controller.py +33 -6
  76. mindsdb/interfaces/knowledge_base/evaluate.py +2 -1
  77. mindsdb/interfaces/knowledge_base/executor.py +24 -0
  78. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
  79. mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
  80. mindsdb/interfaces/query_context/context_controller.py +111 -145
  81. mindsdb/interfaces/skills/skills_controller.py +18 -6
  82. mindsdb/interfaces/storage/db.py +40 -6
  83. mindsdb/interfaces/variables/variables_controller.py +8 -15
  84. mindsdb/utilities/config.py +5 -3
  85. mindsdb/utilities/fs.py +54 -17
  86. mindsdb/utilities/functions.py +72 -60
  87. mindsdb/utilities/log.py +38 -6
  88. mindsdb/utilities/ps.py +7 -7
  89. {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/METADATA +282 -268
  90. {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/RECORD +94 -92
  91. mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
  92. mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
  93. mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
  94. mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
  95. mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
  96. mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
  97. mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
  98. mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
  99. /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
  100. {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/WHEEL +0 -0
  101. {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/licenses/LICENSE +0 -0
  102. {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- # # NOTE: Any changes made here need to be made to requirements_cpu.txt as well
2
- # datasets==2.16.1
3
- # evaluate==0.4.3
4
- # nltk==3.9.1
5
- # huggingface-hub==0.29.3
6
- # torch==2.7.1
7
- # transformers >= 4.42.4
1
+ # NOTE: Any changes made here need to be made to requirements_cpu.txt as well
2
+ datasets==2.16.1
3
+ evaluate==0.4.3
4
+ nltk==3.9.1
5
+ huggingface-hub==0.29.3
6
+ torch==2.8.0
7
+ transformers >= 4.42.4
@@ -1,7 +1,7 @@
1
- # # Needs to be installed with `pip install --extra-index-url https://download.pytorch.org/whl/ .[huggingface_cpu]`
2
- # datasets==2.16.1
3
- # evaluate==0.4.3
4
- # nltk==3.9.1
5
- # huggingface-hub==0.29.3
6
- # torch==2.7.1+cpu
7
- # transformers >= 4.42.4
1
+ # Needs to be installed with `pip install --extra-index-url https://download.pytorch.org/whl/ .[huggingface_cpu]`
2
+ datasets==2.16.1
3
+ evaluate==0.4.3
4
+ nltk==3.9.1
5
+ huggingface-hub==0.29.3
6
+ torch==2.8.0+cpu
7
+ transformers >= 4.42.4
@@ -1,27 +1,27 @@
1
- # from mindsdb.integrations.handlers.huggingface_handler.finetune import (
2
- # _finetune_cls,
3
- # _finetune_fill_mask,
4
- # _finetune_question_answering,
5
- # _finetune_summarization,
6
- # _finetune_text_generation,
7
- # _finetune_translate,
8
- # )
1
+ from mindsdb.integrations.handlers.huggingface_handler.finetune import (
2
+ _finetune_cls,
3
+ _finetune_fill_mask,
4
+ _finetune_question_answering,
5
+ _finetune_summarization,
6
+ _finetune_text_generation,
7
+ _finetune_translate,
8
+ )
9
9
 
10
- # # todo once we have moved predict tasks functions into a separate function
11
- # # PREDICT_MAP = {
12
- # # 'text-classification': self.predict_text_classification,
13
- # # 'zero-shot-classification': self.predict_zero_shot,
14
- # # 'translation': self.predict_translation,
15
- # # 'summarization': self.predict_summarization,
16
- # # 'fill-mask': self.predict_fill_mask
17
- # # }
10
+ # todo once we have moved predict tasks functions into a separate function
11
+ # PREDICT_MAP = {
12
+ # 'text-classification': self.predict_text_classification,
13
+ # 'zero-shot-classification': self.predict_zero_shot,
14
+ # 'translation': self.predict_translation,
15
+ # 'summarization': self.predict_summarization,
16
+ # 'fill-mask': self.predict_fill_mask
17
+ # }
18
18
 
19
- # FINETUNE_MAP = {
20
- # "text-classification": _finetune_cls,
21
- # "zero-shot-classification": _finetune_cls,
22
- # "translation": _finetune_translate,
23
- # "summarization": _finetune_summarization,
24
- # "fill-mask": _finetune_fill_mask,
25
- # "text-generation": _finetune_text_generation,
26
- # "question-answering": _finetune_question_answering,
27
- # }
19
+ FINETUNE_MAP = {
20
+ "text-classification": _finetune_cls,
21
+ "zero-shot-classification": _finetune_cls,
22
+ "translation": _finetune_translate,
23
+ "summarization": _finetune_summarization,
24
+ "fill-mask": _finetune_fill_mask,
25
+ "text-generation": _finetune_text_generation,
26
+ "question-answering": _finetune_question_answering,
27
+ }
@@ -13,7 +13,10 @@ import pandas as pd
13
13
 
14
14
  from mindsdb.interfaces.agents.safe_output_parser import SafeOutputParser
15
15
  from mindsdb.interfaces.agents.langchain_agent import (
16
- get_llm_provider, get_embedding_model_provider, create_chat_model, get_chat_model_params
16
+ get_llm_provider,
17
+ get_embedding_model_provider,
18
+ create_chat_model,
19
+ get_chat_model_params,
17
20
  )
18
21
 
19
22
  from mindsdb.interfaces.agents.constants import (
@@ -24,19 +27,21 @@ from mindsdb.interfaces.agents.constants import (
24
27
  DEFAULT_MAX_TOKENS,
25
28
  DEFAULT_MODEL_NAME,
26
29
  USER_COLUMN,
27
- ASSISTANT_COLUMN
30
+ ASSISTANT_COLUMN,
28
31
  )
29
32
  from mindsdb.integrations.utilities.rag.settings import DEFAULT_RAG_PROMPT_TEMPLATE
30
33
  from mindsdb.integrations.handlers.langchain_handler.tools import setup_tools
31
34
  from mindsdb.integrations.libs.base import BaseMLEngine
32
35
  from mindsdb.interfaces.storage.model_fs import HandlerStorage, ModelStorage
33
- from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
34
- from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS # noqa, for dependency checker
36
+ from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
37
+ construct_model_from_args,
38
+ )
39
+ from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS_PREFIXES # noqa: F401 - for dependency checker
35
40
 
36
41
  from mindsdb.utilities import log
37
42
  from mindsdb.utilities.context_executor import ContextThreadPoolExecutor
38
43
 
39
- _PARSING_ERROR_PREFIXES = ['An output parsing error occured', 'Could not parse LLM output']
44
+ _PARSING_ERROR_PREFIXES = ["An output parsing error occured", "Could not parse LLM output"]
40
45
 
41
46
  logger = log.getLogger(__name__)
42
47
 
@@ -49,7 +54,6 @@ class LangChainHandler(BaseMLEngine):
49
54
  Supported LLM providers:
50
55
  - OpenAI
51
56
  - Anthropic
52
- - Anyscale
53
57
  - Google
54
58
  - LiteLLM
55
59
  - Ollama
@@ -58,13 +62,10 @@ class LangChainHandler(BaseMLEngine):
58
62
  - python_repl
59
63
  - serper.dev search
60
64
  """
61
- name = 'langchain'
62
65
 
63
- def __init__(
64
- self,
65
- model_storage: ModelStorage,
66
- engine_storage: HandlerStorage,
67
- **kwargs):
66
+ name = "langchain"
67
+
68
+ def __init__(self, model_storage: ModelStorage, engine_storage: HandlerStorage, **kwargs):
68
69
  super().__init__(model_storage, engine_storage, **kwargs)
69
70
  # if True, the target column name does not have to be specified at creation time.
70
71
  self.generative = True
@@ -81,77 +82,78 @@ class LangChainHandler(BaseMLEngine):
81
82
  #
82
83
  # Ideally, in the future, we would write a parser that is more robust and flexible than the one Langchain uses.
83
84
  # Response is wrapped in ``
84
- logger.info('Handling parsing error, salvaging response...')
85
- response_output = response.split('`')
85
+ logger.info("Handling parsing error, salvaging response...")
86
+ response_output = response.split("`")
86
87
  if len(response_output) >= 2:
87
88
  response = response_output[-2]
88
89
 
89
90
  # Wrap response in Langchain conversational react format.
90
- langchain_react_formatted_response = f'''Thought: Do I need to use a tool? No
91
- AI: {response}'''
91
+ langchain_react_formatted_response = f"""Thought: Do I need to use a tool? No
92
+ AI: {response}"""
92
93
  return langchain_react_formatted_response
93
- return f'Agent failed with error:\n{str(error)}...'
94
+ return f"Agent failed with error:\n{str(error)}..."
94
95
 
95
96
  def create(self, target: str, args: Dict = None, **kwargs):
96
- self.default_agent_tools = args.get('tools', self.default_agent_tools)
97
-
98
- args = args['using']
99
- args['target'] = target
100
- args['model_name'] = args.get('model_name', DEFAULT_MODEL_NAME)
101
- args['provider'] = args.get('provider', get_llm_provider(args))
102
- args['embedding_model_provider'] = args.get('embedding_model', get_embedding_model_provider(args))
103
- if args.get('mode') == 'retrieval':
97
+ self.default_agent_tools = args.get("tools", self.default_agent_tools)
98
+
99
+ args = args["using"]
100
+ args["target"] = target
101
+ args["model_name"] = args.get("model_name", DEFAULT_MODEL_NAME)
102
+ args["provider"] = args.get("provider", get_llm_provider(args))
103
+ args["embedding_model_provider"] = args.get("embedding_model", get_embedding_model_provider(args))
104
+ if args.get("mode") == "retrieval":
104
105
  # use default prompt template for retrieval i.e. RAG if not provided
105
106
  if "prompt_template" not in args:
106
107
  args["prompt_template"] = DEFAULT_RAG_PROMPT_TEMPLATE
107
108
 
108
- self.model_storage.json_set('args', args)
109
+ self.model_storage.json_set("args", args)
109
110
 
110
111
  @staticmethod
111
112
  def create_validation(_, args: Dict = None, **kwargs):
112
- if 'using' not in args:
113
+ if "using" not in args:
113
114
  raise Exception("LangChain engine requires a USING clause! Refer to its documentation for more details.")
114
115
  else:
115
- args = args['using']
116
- if 'prompt_template' not in args:
117
- if not args.get('mode') == 'retrieval':
118
- raise ValueError('Please provide a `prompt_template` for this engine.')
116
+ args = args["using"]
117
+ if "prompt_template" not in args:
118
+ if not args.get("mode") == "retrieval":
119
+ raise ValueError("Please provide a `prompt_template` for this engine.")
119
120
 
120
121
  def predict(self, df: pd.DataFrame, args: Dict = None) -> pd.DataFrame:
121
122
  """
122
123
  Dispatch is performed depending on the underlying model type. Currently, only the default text completion
123
124
  is supported.
124
125
  """
125
- pred_args = args['predict_params'] if args else {}
126
- args = self.model_storage.json_get('args')
127
- if 'prompt_template' not in args and 'prompt_template' not in pred_args:
126
+ pred_args = args["predict_params"] if args else {}
127
+ args = self.model_storage.json_get("args")
128
+ if "prompt_template" not in args and "prompt_template" not in pred_args:
128
129
  raise ValueError("This model expects a `prompt_template`, please provide one.")
129
130
  # Back compatibility for old models
130
- args['provider'] = args.get('provider', get_llm_provider(args))
131
- args['embedding_model_provider'] = args.get('embedding_model', get_embedding_model_provider(args))
131
+ args["provider"] = args.get("provider", get_llm_provider(args))
132
+ args["embedding_model_provider"] = args.get("embedding_model", get_embedding_model_provider(args))
132
133
 
133
134
  df = df.reset_index(drop=True)
134
135
 
135
- if pred_args.get('mode') == 'chat_model':
136
+ if pred_args.get("mode") == "chat_model":
136
137
  return self.call_llm(df, args, pred_args)
137
138
 
138
139
  agent = self.create_agent(df, args, pred_args)
139
140
  # Use last message as prompt, remove other questions.
140
- user_column = args.get('user_column', USER_COLUMN)
141
+ user_column = args.get("user_column", USER_COLUMN)
141
142
  if user_column not in df.columns:
142
143
  raise Exception(
143
- f"Expected user input in column `{user_column}`, which is not found in the input data. Either provide the column, or redefine the expected column at model creation (`USING user_column = 'value'`)") # noqa
144
+ f"Expected user input in column `{user_column}`, which is not found in the input data. Either provide the column, or redefine the expected column at model creation (`USING user_column = 'value'`)"
145
+ ) # noqa
144
146
  df.iloc[:-1, df.columns.get_loc(user_column)] = None
145
147
  return self.run_agent(df, agent, args, pred_args)
146
148
 
147
149
  def call_llm(self, df, args=None, pred_args=None):
148
150
  llm = create_chat_model({**args, **pred_args})
149
151
 
150
- user_column = args.get('user_column', USER_COLUMN)
151
- assistant_column = args.get('assistant_column', ASSISTANT_COLUMN)
152
+ user_column = args.get("user_column", USER_COLUMN)
153
+ assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
152
154
 
153
155
  question = df[user_column].iloc[-1]
154
- resp = llm([HumanMessage(question)], stop=['\nObservation:', '\n\tObservation:'])
156
+ resp = llm([HumanMessage(question)], stop=["\nObservation:", "\n\tObservation:"])
155
157
 
156
158
  return pd.DataFrame([resp.content], columns=[assistant_column])
157
159
 
@@ -162,25 +164,22 @@ AI: {response}'''
162
164
  model_kwargs = get_chat_model_params({**args, **pred_args})
163
165
  llm = create_chat_model({**args, **pred_args})
164
166
 
165
- tools = setup_tools(llm,
166
- model_kwargs,
167
- pred_args,
168
- self.default_agent_tools)
167
+ tools = setup_tools(llm, model_kwargs, pred_args, self.default_agent_tools)
169
168
 
170
169
  # Prefer prediction prompt template over original if provided.
171
- prompt_template = pred_args.get('prompt_template', args['prompt_template'])
172
- if 'context' in pred_args:
173
- prompt_template += '\n\n' + 'Useful information:\n' + pred_args['context'] + '\n'
170
+ prompt_template = pred_args.get("prompt_template", args["prompt_template"])
171
+ if "context" in pred_args:
172
+ prompt_template += "\n\n" + "Useful information:\n" + pred_args["context"] + "\n"
174
173
 
175
174
  # Set up memory.
176
- memory = ConversationSummaryBufferMemory(llm=llm,
177
- max_token_limit=model_kwargs.get('max_tokens', DEFAULT_MAX_TOKENS),
178
- memory_key='chat_history')
175
+ memory = ConversationSummaryBufferMemory(
176
+ llm=llm, max_token_limit=model_kwargs.get("max_tokens", DEFAULT_MAX_TOKENS), memory_key="chat_history"
177
+ )
179
178
  memory.chat_memory.messages.insert(0, SystemMessage(content=prompt_template))
180
179
  # User - Assistant conversation. All except the last message.
181
- user_column = args.get('user_column', USER_COLUMN)
182
- assistant_column = args.get('assistant_column', ASSISTANT_COLUMN)
183
- for row in df[:-1].to_dict('records'):
180
+ user_column = args.get("user_column", USER_COLUMN)
181
+ assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
182
+ for row in df[:-1].to_dict("records"):
184
183
  question = row[user_column]
185
184
  answer = row[assistant_column]
186
185
  if question:
@@ -188,45 +187,47 @@ AI: {response}'''
188
187
  if answer:
189
188
  memory.chat_memory.add_ai_message(answer)
190
189
 
191
- agent_type = args.get('agent_type', DEFAULT_AGENT_TYPE)
190
+ agent_type = args.get("agent_type", DEFAULT_AGENT_TYPE)
192
191
  agent_executor = initialize_agent(
193
192
  tools,
194
193
  llm,
195
194
  agent=agent_type,
196
195
  # Use custom output parser to handle flaky LLMs that don't ALWAYS conform to output format.
197
- agent_kwargs={'output_parser': SafeOutputParser()},
196
+ agent_kwargs={"output_parser": SafeOutputParser()},
198
197
  # Calls the agent’s LLM Chain one final time to generate a final answer based on the previous steps
199
- early_stopping_method='generate',
198
+ early_stopping_method="generate",
200
199
  handle_parsing_errors=self._handle_parsing_errors,
201
200
  # Timeout per agent invocation.
202
- max_execution_time=pred_args.get('timeout_seconds', args.get('timeout_seconds', DEFAULT_AGENT_TIMEOUT_SECONDS)),
203
- max_iterations=pred_args.get('max_iterations', args.get('max_iterations', DEFAULT_MAX_ITERATIONS)),
201
+ max_execution_time=pred_args.get(
202
+ "timeout_seconds", args.get("timeout_seconds", DEFAULT_AGENT_TIMEOUT_SECONDS)
203
+ ),
204
+ max_iterations=pred_args.get("max_iterations", args.get("max_iterations", DEFAULT_MAX_ITERATIONS)),
204
205
  memory=memory,
205
- verbose=pred_args.get('verbose', args.get('verbose', True))
206
+ verbose=pred_args.get("verbose", args.get("verbose", True)),
206
207
  )
207
208
  return agent_executor
208
209
 
209
210
  def run_agent(self, df: pd.DataFrame, agent: AgentExecutor, args: Dict, pred_args: Dict) -> pd.DataFrame:
210
211
  # Prefer prediction time prompt template, if available.
211
- base_template = pred_args.get('prompt_template', args['prompt_template'])
212
+ base_template = pred_args.get("prompt_template", args["prompt_template"])
212
213
 
213
214
  input_variables = []
214
215
  matches = list(re.finditer("{{(.*?)}}", base_template))
215
216
 
216
217
  for m in matches:
217
- input_variables.append(m[0].replace('{', '').replace('}', ''))
218
+ input_variables.append(m[0].replace("{", "").replace("}", ""))
218
219
  empty_prompt_ids = np.where(df[input_variables].isna().all(axis=1).values)[0]
219
220
 
220
- base_template = base_template.replace('{{', '{').replace('}}', '}')
221
+ base_template = base_template.replace("{{", "{").replace("}}", "}")
221
222
  prompts = []
222
223
 
223
- user_column = args.get('user_column', USER_COLUMN)
224
+ user_column = args.get("user_column", USER_COLUMN)
224
225
  for i, row in df.iterrows():
225
226
  if i not in empty_prompt_ids:
226
227
  prompt = PromptTemplate(input_variables=input_variables, template=base_template)
227
228
  kwargs = {}
228
229
  for col in input_variables:
229
- kwargs[col] = row[col] if row[col] is not None else '' # add empty quote if data is missing
230
+ kwargs[col] = row[col] if row[col] is not None else "" # add empty quote if data is missing
230
231
  prompts.append(prompt.format(**kwargs))
231
232
  elif row.get(user_column):
232
233
  # Just add prompt
@@ -234,32 +235,37 @@ AI: {response}'''
234
235
 
235
236
  def _invoke_agent_executor_with_prompt(agent_executor, prompt):
236
237
  if not prompt:
237
- return ''
238
+ return ""
238
239
  try:
239
240
  answer = agent_executor.invoke(prompt)
240
241
  except Exception as e:
241
242
  answer = str(e)
242
243
  if not answer.startswith("Could not parse LLM output: `"):
243
244
  raise e
244
- answer = {'output': answer.removeprefix("Could not parse LLM output: `").removesuffix("`")}
245
+ answer = {"output": answer.removeprefix("Could not parse LLM output: `").removesuffix("`")}
245
246
 
246
- if 'output' not in answer:
247
+ if "output" not in answer:
247
248
  # This should never happen unless Langchain changes invoke output format, but just in case.
248
249
  return agent_executor.run(prompt)
249
- return answer['output']
250
+ return answer["output"]
250
251
 
251
252
  completions = []
252
253
  # max_workers defaults to number of processors on the machine multiplied by 5.
253
254
  # https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
254
- max_workers = args.get('max_workers', None)
255
- agent_timeout_seconds = args.get('timeout', DEFAULT_AGENT_TIMEOUT_SECONDS)
255
+ max_workers = args.get("max_workers", None)
256
+ agent_timeout_seconds = args.get("timeout", DEFAULT_AGENT_TIMEOUT_SECONDS)
256
257
  executor = ContextThreadPoolExecutor(max_workers=max_workers)
257
258
  futures = [executor.submit(_invoke_agent_executor_with_prompt, agent, prompt) for prompt in prompts]
258
259
  try:
259
260
  for future in as_completed(futures, timeout=agent_timeout_seconds):
260
261
  completions.append(future.result())
261
262
  except TimeoutError:
262
- completions.append("I'm sorry! I couldn't come up with a response in time. Please try again.")
263
+ completions.append(
264
+ f"I'm sorry! I couldn't generate a response within the allotted time ({agent_timeout_seconds} seconds). "
265
+ "If you need more time for processing, you can adjust the timeout settings. "
266
+ "Please refer to the documentation for instructions on how to change the timeout value. "
267
+ "Feel free to try your request again."
268
+ )
263
269
  # Can't use ThreadPoolExecutor as context manager since we need wait=False.
264
270
  executor.shutdown(wait=False)
265
271
 
@@ -267,13 +273,13 @@ AI: {response}'''
267
273
  for i in sorted(empty_prompt_ids)[:-1]:
268
274
  completions.insert(i, None)
269
275
 
270
- pred_df = pd.DataFrame(completions, columns=[args['target']])
276
+ pred_df = pd.DataFrame(completions, columns=[args["target"]])
271
277
 
272
278
  return pred_df
273
279
 
274
280
  def describe(self, attribute: Optional[str] = None) -> pd.DataFrame:
275
- tables = ['info']
276
- return pd.DataFrame(tables, columns=['tables'])
281
+ tables = ["info"]
282
+ return pd.DataFrame(tables, columns=["tables"])
277
283
 
278
284
  def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
279
- raise NotImplementedError('Fine-tuning is not supported for LangChain models')
285
+ raise NotImplementedError("Fine-tuning is not supported for LangChain models")
@@ -1,4 +1,4 @@
1
- lightwood>=25.5.2.2
2
- lightwood[extra]>=25.5.2.2
3
- lightwood[xai]>=25.5.2.2
4
- type_infer==0.0.22
1
+ lightwood>=25.7.5.1
2
+ lightwood[extra]>=25.7.5.1
3
+ lightwood[xai]>=25.7.5.1
4
+ type_infer==0.0.23
@@ -87,6 +87,7 @@ class LiteLLMHandler(BaseMLEngine):
87
87
 
88
88
  # check engine_storage for api_key
89
89
  input_args.update({k: v for k, v in ml_engine_args.items()})
90
+ input_args["target"] = target
90
91
 
91
92
  # validate args
92
93
  export_args = CompletionParameters(**input_args).model_dump()
@@ -104,6 +105,8 @@ class LiteLLMHandler(BaseMLEngine):
104
105
  # validate args
105
106
  args = CompletionParameters(**input_args).model_dump()
106
107
 
108
+ target = args.pop("target")
109
+
107
110
  # build messages
108
111
  self._build_messages(args, df)
109
112
 
@@ -113,12 +116,12 @@ class LiteLLMHandler(BaseMLEngine):
113
116
  if len(args["messages"]) > 1:
114
117
  # if more than one message, use batch completion
115
118
  responses = batch_completion(**args)
116
- return pd.DataFrame({"result": [response.choices[0].message.content for response in responses]})
119
+ return pd.DataFrame({target: [response.choices[0].message.content for response in responses]})
117
120
 
118
121
  # run completion
119
122
  response = completion(**args)
120
123
 
121
- return pd.DataFrame({"result": [response.choices[0].message.content]})
124
+ return pd.DataFrame({target: [response.choices[0].message.content]})
122
125
 
123
126
  @staticmethod
124
127
  def _prompt_to_messages(prompt: str, **kwargs) -> List[Dict]:
@@ -31,7 +31,8 @@ class CompletionParameters(BaseModel):
31
31
  # set api_base, api_version, api_key
32
32
  base_url: Optional[str] = None # Base URL of the API.
33
33
  api_version: Optional[str] = None # Version of the API to be used.
34
- api_key: str # API key for authentication.
34
+ api_key: Optional[str] = None # API key for authentication.
35
+ target: Optional[str] = None # the name of output column
35
36
 
36
37
  class Config:
37
38
  extra = Extra.forbid
@@ -1,38 +1,19 @@
1
- OPENAI_API_BASE = 'https://api.openai.com/v1'
1
+ OPENAI_API_BASE = "https://api.openai.com/v1"
2
2
 
3
- CHAT_MODELS = (
4
- 'gpt-3.5-turbo',
5
- 'gpt-3.5-turbo-16k',
6
- 'gpt-3.5-turbo-instruct',
7
- 'gpt-4',
8
- 'gpt-4-32k',
9
- 'gpt-4-1106-preview',
10
- 'gpt-4-0125-preview',
11
- 'gpt-4o',
12
- 'o3-mini',
13
- 'o1-mini'
14
- )
15
- COMPLETION_MODELS = ('babbage-002', 'davinci-002')
16
- FINETUNING_MODELS = ('gpt-3.5-turbo', 'babbage-002', 'davinci-002', 'gpt-4')
17
- COMPLETION_LEGACY_BASE_MODELS = ('davinci', 'curie', 'babbage', 'ada')
18
- DEFAULT_CHAT_MODEL = 'gpt-3.5-turbo'
3
+ CHAT_MODELS_PREFIXES = ("gpt-3.5", "gpt-3.5", "gpt-3.5", "gpt-4", "o3-mini", "o1-mini")
4
+ COMPLETION_MODELS = ("babbage-002", "davinci-002")
5
+ FINETUNING_MODELS = ("gpt-3.5-turbo", "babbage-002", "davinci-002", "gpt-4")
6
+ COMPLETION_LEGACY_BASE_MODELS = ("davinci", "curie", "babbage", "ada")
7
+ DEFAULT_CHAT_MODEL = "gpt-4o-mini"
19
8
 
20
9
  FINETUNING_LEGACY_MODELS = FINETUNING_MODELS
21
10
  COMPLETION_LEGACY_MODELS = (
22
11
  COMPLETION_LEGACY_BASE_MODELS
23
- + tuple(f'text-{model}-001' for model in COMPLETION_LEGACY_BASE_MODELS)
24
- + ('text-davinci-002', 'text-davinci-003')
12
+ + tuple(f"text-{model}-001" for model in COMPLETION_LEGACY_BASE_MODELS)
13
+ + ("text-davinci-002", "text-davinci-003")
25
14
  )
26
15
 
27
- EMBEDDING_MODELS = (
28
- ('text-embedding-ada-002',)
29
- + tuple(f'text-similarity-{model}-001' for model in COMPLETION_LEGACY_BASE_MODELS)
30
- + tuple(f'text-search-{model}-query-001' for model in COMPLETION_LEGACY_BASE_MODELS)
31
- + tuple(f'text-search-{model}-doc-001' for model in COMPLETION_LEGACY_BASE_MODELS)
32
- + tuple(f'code-search-{model}-text-001' for model in COMPLETION_LEGACY_BASE_MODELS)
33
- + tuple(f'code-search-{model}-code-001' for model in COMPLETION_LEGACY_BASE_MODELS)
34
- )
35
- DEFAULT_EMBEDDING_MODEL = 'text-embedding-ada-002'
16
+ DEFAULT_EMBEDDING_MODEL = "text-embedding-ada-002"
36
17
 
37
- IMAGE_MODELS = ('dall-e-2', 'dall-e-3')
38
- DEFAULT_IMAGE_MODEL = 'dall-e-2'
18
+ IMAGE_MODELS = ("dall-e-2", "dall-e-3")
19
+ DEFAULT_IMAGE_MODEL = "dall-e-2"