MindsDB 25.7.3.0__py3-none-any.whl → 25.8.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +11 -1
- mindsdb/api/a2a/common/server/server.py +16 -6
- mindsdb/api/executor/command_executor.py +215 -150
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +14 -3
- mindsdb/api/executor/planner/plan_join.py +3 -0
- mindsdb/api/executor/planner/plan_join_ts.py +117 -100
- mindsdb/api/executor/planner/query_planner.py +1 -0
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +54 -85
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +21 -24
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +9 -3
- mindsdb/api/executor/sql_query/steps/subselect_step.py +11 -8
- mindsdb/api/executor/utilities/mysql_to_duckdb_functions.py +264 -0
- mindsdb/api/executor/utilities/sql.py +30 -0
- mindsdb/api/http/initialize.py +18 -44
- mindsdb/api/http/namespaces/agents.py +23 -20
- mindsdb/api/http/namespaces/chatbots.py +83 -120
- mindsdb/api/http/namespaces/file.py +1 -1
- mindsdb/api/http/namespaces/jobs.py +38 -60
- mindsdb/api/http/namespaces/tree.py +69 -61
- mindsdb/api/http/namespaces/views.py +56 -72
- mindsdb/api/mcp/start.py +2 -0
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +3 -2
- mindsdb/integrations/handlers/autogluon_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/autosklearn_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/bigquery_handler/bigquery_handler.py +25 -5
- mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +3 -3
- mindsdb/integrations/handlers/db2_handler/db2_handler.py +19 -23
- mindsdb/integrations/handlers/flaml_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/gong_handler/__about__.py +2 -0
- mindsdb/integrations/handlers/gong_handler/__init__.py +30 -0
- mindsdb/integrations/handlers/gong_handler/connection_args.py +37 -0
- mindsdb/integrations/handlers/gong_handler/gong_handler.py +164 -0
- mindsdb/integrations/handlers/gong_handler/gong_tables.py +508 -0
- mindsdb/integrations/handlers/gong_handler/icon.svg +25 -0
- mindsdb/integrations/handlers/gong_handler/test_gong_handler.py +125 -0
- mindsdb/integrations/handlers/google_calendar_handler/google_calendar_tables.py +82 -73
- mindsdb/integrations/handlers/hubspot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/huggingface_handler/__init__.py +8 -12
- mindsdb/integrations/handlers/huggingface_handler/finetune.py +203 -223
- mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +360 -383
- mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -7
- mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
- mindsdb/integrations/handlers/langchain_handler/langchain_handler.py +83 -77
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +5 -2
- mindsdb/integrations/handlers/litellm_handler/settings.py +2 -1
- mindsdb/integrations/handlers/openai_handler/constants.py +11 -30
- mindsdb/integrations/handlers/openai_handler/helpers.py +27 -34
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +14 -12
- mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +106 -90
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +41 -39
- mindsdb/integrations/handlers/salesforce_handler/constants.py +215 -0
- mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +141 -80
- mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +0 -1
- mindsdb/integrations/handlers/tpot_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +32 -17
- mindsdb/integrations/handlers/web_handler/web_handler.py +19 -22
- mindsdb/integrations/libs/llm/config.py +0 -14
- mindsdb/integrations/libs/llm/utils.py +0 -15
- mindsdb/integrations/libs/vectordatabase_handler.py +10 -1
- mindsdb/integrations/utilities/files/file_reader.py +5 -19
- mindsdb/integrations/utilities/handler_utils.py +32 -12
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +1 -1
- mindsdb/interfaces/agents/agents_controller.py +246 -149
- mindsdb/interfaces/agents/constants.py +0 -1
- mindsdb/interfaces/agents/langchain_agent.py +11 -6
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +4 -4
- mindsdb/interfaces/database/database.py +38 -13
- mindsdb/interfaces/database/integrations.py +20 -5
- mindsdb/interfaces/database/projects.py +174 -23
- mindsdb/interfaces/database/views.py +86 -60
- mindsdb/interfaces/jobs/jobs_controller.py +103 -110
- mindsdb/interfaces/knowledge_base/controller.py +33 -6
- mindsdb/interfaces/knowledge_base/evaluate.py +2 -1
- mindsdb/interfaces/knowledge_base/executor.py +24 -0
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +6 -10
- mindsdb/interfaces/knowledge_base/preprocessing/text_splitter.py +73 -0
- mindsdb/interfaces/query_context/context_controller.py +111 -145
- mindsdb/interfaces/skills/skills_controller.py +18 -6
- mindsdb/interfaces/storage/db.py +40 -6
- mindsdb/interfaces/variables/variables_controller.py +8 -15
- mindsdb/utilities/config.py +5 -3
- mindsdb/utilities/fs.py +54 -17
- mindsdb/utilities/functions.py +72 -60
- mindsdb/utilities/log.py +38 -6
- mindsdb/utilities/ps.py +7 -7
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/METADATA +282 -268
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/RECORD +94 -92
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__about__.py +0 -9
- mindsdb/integrations/handlers/anyscale_endpoints_handler/__init__.py +0 -20
- mindsdb/integrations/handlers/anyscale_endpoints_handler/anyscale_endpoints_handler.py +0 -290
- mindsdb/integrations/handlers/anyscale_endpoints_handler/creation_args.py +0 -14
- mindsdb/integrations/handlers/anyscale_endpoints_handler/icon.svg +0 -4
- mindsdb/integrations/handlers/anyscale_endpoints_handler/requirements.txt +0 -2
- mindsdb/integrations/handlers/anyscale_endpoints_handler/settings.py +0 -51
- mindsdb/integrations/handlers/anyscale_endpoints_handler/tests/test_anyscale_endpoints_handler.py +0 -212
- /mindsdb/integrations/handlers/{anyscale_endpoints_handler/tests/__init__.py → gong_handler/requirements.txt} +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.7.3.0.dist-info → mindsdb-25.8.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
# NOTE: Any changes made here need to be made to requirements_cpu.txt as well
|
|
2
|
+
datasets==2.16.1
|
|
3
|
+
evaluate==0.4.3
|
|
4
|
+
nltk==3.9.1
|
|
5
|
+
huggingface-hub==0.29.3
|
|
6
|
+
torch==2.8.0
|
|
7
|
+
transformers >= 4.42.4
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
# Needs to be installed with `pip install --extra-index-url https://download.pytorch.org/whl/ .[huggingface_cpu]`
|
|
2
|
+
datasets==2.16.1
|
|
3
|
+
evaluate==0.4.3
|
|
4
|
+
nltk==3.9.1
|
|
5
|
+
huggingface-hub==0.29.3
|
|
6
|
+
torch==2.8.0+cpu
|
|
7
|
+
transformers >= 4.42.4
|
|
@@ -1,27 +1,27 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
from mindsdb.integrations.handlers.huggingface_handler.finetune import (
|
|
2
|
+
_finetune_cls,
|
|
3
|
+
_finetune_fill_mask,
|
|
4
|
+
_finetune_question_answering,
|
|
5
|
+
_finetune_summarization,
|
|
6
|
+
_finetune_text_generation,
|
|
7
|
+
_finetune_translate,
|
|
8
|
+
)
|
|
9
9
|
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
10
|
+
# todo once we have moved predict tasks functions into a separate function
|
|
11
|
+
# PREDICT_MAP = {
|
|
12
|
+
# 'text-classification': self.predict_text_classification,
|
|
13
|
+
# 'zero-shot-classification': self.predict_zero_shot,
|
|
14
|
+
# 'translation': self.predict_translation,
|
|
15
|
+
# 'summarization': self.predict_summarization,
|
|
16
|
+
# 'fill-mask': self.predict_fill_mask
|
|
17
|
+
# }
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
19
|
+
FINETUNE_MAP = {
|
|
20
|
+
"text-classification": _finetune_cls,
|
|
21
|
+
"zero-shot-classification": _finetune_cls,
|
|
22
|
+
"translation": _finetune_translate,
|
|
23
|
+
"summarization": _finetune_summarization,
|
|
24
|
+
"fill-mask": _finetune_fill_mask,
|
|
25
|
+
"text-generation": _finetune_text_generation,
|
|
26
|
+
"question-answering": _finetune_question_answering,
|
|
27
|
+
}
|
|
@@ -13,7 +13,10 @@ import pandas as pd
|
|
|
13
13
|
|
|
14
14
|
from mindsdb.interfaces.agents.safe_output_parser import SafeOutputParser
|
|
15
15
|
from mindsdb.interfaces.agents.langchain_agent import (
|
|
16
|
-
get_llm_provider,
|
|
16
|
+
get_llm_provider,
|
|
17
|
+
get_embedding_model_provider,
|
|
18
|
+
create_chat_model,
|
|
19
|
+
get_chat_model_params,
|
|
17
20
|
)
|
|
18
21
|
|
|
19
22
|
from mindsdb.interfaces.agents.constants import (
|
|
@@ -24,19 +27,21 @@ from mindsdb.interfaces.agents.constants import (
|
|
|
24
27
|
DEFAULT_MAX_TOKENS,
|
|
25
28
|
DEFAULT_MODEL_NAME,
|
|
26
29
|
USER_COLUMN,
|
|
27
|
-
ASSISTANT_COLUMN
|
|
30
|
+
ASSISTANT_COLUMN,
|
|
28
31
|
)
|
|
29
32
|
from mindsdb.integrations.utilities.rag.settings import DEFAULT_RAG_PROMPT_TEMPLATE
|
|
30
33
|
from mindsdb.integrations.handlers.langchain_handler.tools import setup_tools
|
|
31
34
|
from mindsdb.integrations.libs.base import BaseMLEngine
|
|
32
35
|
from mindsdb.interfaces.storage.model_fs import HandlerStorage, ModelStorage
|
|
33
|
-
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import
|
|
34
|
-
|
|
36
|
+
from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
|
|
37
|
+
construct_model_from_args,
|
|
38
|
+
)
|
|
39
|
+
from mindsdb.integrations.handlers.openai_handler.constants import CHAT_MODELS_PREFIXES # noqa: F401 - for dependency checker
|
|
35
40
|
|
|
36
41
|
from mindsdb.utilities import log
|
|
37
42
|
from mindsdb.utilities.context_executor import ContextThreadPoolExecutor
|
|
38
43
|
|
|
39
|
-
_PARSING_ERROR_PREFIXES = [
|
|
44
|
+
_PARSING_ERROR_PREFIXES = ["An output parsing error occured", "Could not parse LLM output"]
|
|
40
45
|
|
|
41
46
|
logger = log.getLogger(__name__)
|
|
42
47
|
|
|
@@ -49,7 +54,6 @@ class LangChainHandler(BaseMLEngine):
|
|
|
49
54
|
Supported LLM providers:
|
|
50
55
|
- OpenAI
|
|
51
56
|
- Anthropic
|
|
52
|
-
- Anyscale
|
|
53
57
|
- Google
|
|
54
58
|
- LiteLLM
|
|
55
59
|
- Ollama
|
|
@@ -58,13 +62,10 @@ class LangChainHandler(BaseMLEngine):
|
|
|
58
62
|
- python_repl
|
|
59
63
|
- serper.dev search
|
|
60
64
|
"""
|
|
61
|
-
name = 'langchain'
|
|
62
65
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
engine_storage: HandlerStorage,
|
|
67
|
-
**kwargs):
|
|
66
|
+
name = "langchain"
|
|
67
|
+
|
|
68
|
+
def __init__(self, model_storage: ModelStorage, engine_storage: HandlerStorage, **kwargs):
|
|
68
69
|
super().__init__(model_storage, engine_storage, **kwargs)
|
|
69
70
|
# if True, the target column name does not have to be specified at creation time.
|
|
70
71
|
self.generative = True
|
|
@@ -81,77 +82,78 @@ class LangChainHandler(BaseMLEngine):
|
|
|
81
82
|
#
|
|
82
83
|
# Ideally, in the future, we would write a parser that is more robust and flexible than the one Langchain uses.
|
|
83
84
|
# Response is wrapped in ``
|
|
84
|
-
logger.info(
|
|
85
|
-
response_output = response.split(
|
|
85
|
+
logger.info("Handling parsing error, salvaging response...")
|
|
86
|
+
response_output = response.split("`")
|
|
86
87
|
if len(response_output) >= 2:
|
|
87
88
|
response = response_output[-2]
|
|
88
89
|
|
|
89
90
|
# Wrap response in Langchain conversational react format.
|
|
90
|
-
langchain_react_formatted_response = f
|
|
91
|
-
AI: {response}
|
|
91
|
+
langchain_react_formatted_response = f"""Thought: Do I need to use a tool? No
|
|
92
|
+
AI: {response}"""
|
|
92
93
|
return langchain_react_formatted_response
|
|
93
|
-
return f
|
|
94
|
+
return f"Agent failed with error:\n{str(error)}..."
|
|
94
95
|
|
|
95
96
|
def create(self, target: str, args: Dict = None, **kwargs):
|
|
96
|
-
self.default_agent_tools = args.get(
|
|
97
|
-
|
|
98
|
-
args = args[
|
|
99
|
-
args[
|
|
100
|
-
args[
|
|
101
|
-
args[
|
|
102
|
-
args[
|
|
103
|
-
if args.get(
|
|
97
|
+
self.default_agent_tools = args.get("tools", self.default_agent_tools)
|
|
98
|
+
|
|
99
|
+
args = args["using"]
|
|
100
|
+
args["target"] = target
|
|
101
|
+
args["model_name"] = args.get("model_name", DEFAULT_MODEL_NAME)
|
|
102
|
+
args["provider"] = args.get("provider", get_llm_provider(args))
|
|
103
|
+
args["embedding_model_provider"] = args.get("embedding_model", get_embedding_model_provider(args))
|
|
104
|
+
if args.get("mode") == "retrieval":
|
|
104
105
|
# use default prompt template for retrieval i.e. RAG if not provided
|
|
105
106
|
if "prompt_template" not in args:
|
|
106
107
|
args["prompt_template"] = DEFAULT_RAG_PROMPT_TEMPLATE
|
|
107
108
|
|
|
108
|
-
self.model_storage.json_set(
|
|
109
|
+
self.model_storage.json_set("args", args)
|
|
109
110
|
|
|
110
111
|
@staticmethod
|
|
111
112
|
def create_validation(_, args: Dict = None, **kwargs):
|
|
112
|
-
if
|
|
113
|
+
if "using" not in args:
|
|
113
114
|
raise Exception("LangChain engine requires a USING clause! Refer to its documentation for more details.")
|
|
114
115
|
else:
|
|
115
|
-
args = args[
|
|
116
|
-
if
|
|
117
|
-
if not args.get(
|
|
118
|
-
raise ValueError(
|
|
116
|
+
args = args["using"]
|
|
117
|
+
if "prompt_template" not in args:
|
|
118
|
+
if not args.get("mode") == "retrieval":
|
|
119
|
+
raise ValueError("Please provide a `prompt_template` for this engine.")
|
|
119
120
|
|
|
120
121
|
def predict(self, df: pd.DataFrame, args: Dict = None) -> pd.DataFrame:
|
|
121
122
|
"""
|
|
122
123
|
Dispatch is performed depending on the underlying model type. Currently, only the default text completion
|
|
123
124
|
is supported.
|
|
124
125
|
"""
|
|
125
|
-
pred_args = args[
|
|
126
|
-
args = self.model_storage.json_get(
|
|
127
|
-
if
|
|
126
|
+
pred_args = args["predict_params"] if args else {}
|
|
127
|
+
args = self.model_storage.json_get("args")
|
|
128
|
+
if "prompt_template" not in args and "prompt_template" not in pred_args:
|
|
128
129
|
raise ValueError("This model expects a `prompt_template`, please provide one.")
|
|
129
130
|
# Back compatibility for old models
|
|
130
|
-
args[
|
|
131
|
-
args[
|
|
131
|
+
args["provider"] = args.get("provider", get_llm_provider(args))
|
|
132
|
+
args["embedding_model_provider"] = args.get("embedding_model", get_embedding_model_provider(args))
|
|
132
133
|
|
|
133
134
|
df = df.reset_index(drop=True)
|
|
134
135
|
|
|
135
|
-
if pred_args.get(
|
|
136
|
+
if pred_args.get("mode") == "chat_model":
|
|
136
137
|
return self.call_llm(df, args, pred_args)
|
|
137
138
|
|
|
138
139
|
agent = self.create_agent(df, args, pred_args)
|
|
139
140
|
# Use last message as prompt, remove other questions.
|
|
140
|
-
user_column = args.get(
|
|
141
|
+
user_column = args.get("user_column", USER_COLUMN)
|
|
141
142
|
if user_column not in df.columns:
|
|
142
143
|
raise Exception(
|
|
143
|
-
f"Expected user input in column `{user_column}`, which is not found in the input data. Either provide the column, or redefine the expected column at model creation (`USING user_column = 'value'`)"
|
|
144
|
+
f"Expected user input in column `{user_column}`, which is not found in the input data. Either provide the column, or redefine the expected column at model creation (`USING user_column = 'value'`)"
|
|
145
|
+
) # noqa
|
|
144
146
|
df.iloc[:-1, df.columns.get_loc(user_column)] = None
|
|
145
147
|
return self.run_agent(df, agent, args, pred_args)
|
|
146
148
|
|
|
147
149
|
def call_llm(self, df, args=None, pred_args=None):
|
|
148
150
|
llm = create_chat_model({**args, **pred_args})
|
|
149
151
|
|
|
150
|
-
user_column = args.get(
|
|
151
|
-
assistant_column = args.get(
|
|
152
|
+
user_column = args.get("user_column", USER_COLUMN)
|
|
153
|
+
assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
|
|
152
154
|
|
|
153
155
|
question = df[user_column].iloc[-1]
|
|
154
|
-
resp = llm([HumanMessage(question)], stop=[
|
|
156
|
+
resp = llm([HumanMessage(question)], stop=["\nObservation:", "\n\tObservation:"])
|
|
155
157
|
|
|
156
158
|
return pd.DataFrame([resp.content], columns=[assistant_column])
|
|
157
159
|
|
|
@@ -162,25 +164,22 @@ AI: {response}'''
|
|
|
162
164
|
model_kwargs = get_chat_model_params({**args, **pred_args})
|
|
163
165
|
llm = create_chat_model({**args, **pred_args})
|
|
164
166
|
|
|
165
|
-
tools = setup_tools(llm,
|
|
166
|
-
model_kwargs,
|
|
167
|
-
pred_args,
|
|
168
|
-
self.default_agent_tools)
|
|
167
|
+
tools = setup_tools(llm, model_kwargs, pred_args, self.default_agent_tools)
|
|
169
168
|
|
|
170
169
|
# Prefer prediction prompt template over original if provided.
|
|
171
|
-
prompt_template = pred_args.get(
|
|
172
|
-
if
|
|
173
|
-
prompt_template +=
|
|
170
|
+
prompt_template = pred_args.get("prompt_template", args["prompt_template"])
|
|
171
|
+
if "context" in pred_args:
|
|
172
|
+
prompt_template += "\n\n" + "Useful information:\n" + pred_args["context"] + "\n"
|
|
174
173
|
|
|
175
174
|
# Set up memory.
|
|
176
|
-
memory = ConversationSummaryBufferMemory(
|
|
177
|
-
|
|
178
|
-
|
|
175
|
+
memory = ConversationSummaryBufferMemory(
|
|
176
|
+
llm=llm, max_token_limit=model_kwargs.get("max_tokens", DEFAULT_MAX_TOKENS), memory_key="chat_history"
|
|
177
|
+
)
|
|
179
178
|
memory.chat_memory.messages.insert(0, SystemMessage(content=prompt_template))
|
|
180
179
|
# User - Assistant conversation. All except the last message.
|
|
181
|
-
user_column = args.get(
|
|
182
|
-
assistant_column = args.get(
|
|
183
|
-
for row in df[:-1].to_dict(
|
|
180
|
+
user_column = args.get("user_column", USER_COLUMN)
|
|
181
|
+
assistant_column = args.get("assistant_column", ASSISTANT_COLUMN)
|
|
182
|
+
for row in df[:-1].to_dict("records"):
|
|
184
183
|
question = row[user_column]
|
|
185
184
|
answer = row[assistant_column]
|
|
186
185
|
if question:
|
|
@@ -188,45 +187,47 @@ AI: {response}'''
|
|
|
188
187
|
if answer:
|
|
189
188
|
memory.chat_memory.add_ai_message(answer)
|
|
190
189
|
|
|
191
|
-
agent_type = args.get(
|
|
190
|
+
agent_type = args.get("agent_type", DEFAULT_AGENT_TYPE)
|
|
192
191
|
agent_executor = initialize_agent(
|
|
193
192
|
tools,
|
|
194
193
|
llm,
|
|
195
194
|
agent=agent_type,
|
|
196
195
|
# Use custom output parser to handle flaky LLMs that don't ALWAYS conform to output format.
|
|
197
|
-
agent_kwargs={
|
|
196
|
+
agent_kwargs={"output_parser": SafeOutputParser()},
|
|
198
197
|
# Calls the agent’s LLM Chain one final time to generate a final answer based on the previous steps
|
|
199
|
-
early_stopping_method=
|
|
198
|
+
early_stopping_method="generate",
|
|
200
199
|
handle_parsing_errors=self._handle_parsing_errors,
|
|
201
200
|
# Timeout per agent invocation.
|
|
202
|
-
max_execution_time=pred_args.get(
|
|
203
|
-
|
|
201
|
+
max_execution_time=pred_args.get(
|
|
202
|
+
"timeout_seconds", args.get("timeout_seconds", DEFAULT_AGENT_TIMEOUT_SECONDS)
|
|
203
|
+
),
|
|
204
|
+
max_iterations=pred_args.get("max_iterations", args.get("max_iterations", DEFAULT_MAX_ITERATIONS)),
|
|
204
205
|
memory=memory,
|
|
205
|
-
verbose=pred_args.get(
|
|
206
|
+
verbose=pred_args.get("verbose", args.get("verbose", True)),
|
|
206
207
|
)
|
|
207
208
|
return agent_executor
|
|
208
209
|
|
|
209
210
|
def run_agent(self, df: pd.DataFrame, agent: AgentExecutor, args: Dict, pred_args: Dict) -> pd.DataFrame:
|
|
210
211
|
# Prefer prediction time prompt template, if available.
|
|
211
|
-
base_template = pred_args.get(
|
|
212
|
+
base_template = pred_args.get("prompt_template", args["prompt_template"])
|
|
212
213
|
|
|
213
214
|
input_variables = []
|
|
214
215
|
matches = list(re.finditer("{{(.*?)}}", base_template))
|
|
215
216
|
|
|
216
217
|
for m in matches:
|
|
217
|
-
input_variables.append(m[0].replace(
|
|
218
|
+
input_variables.append(m[0].replace("{", "").replace("}", ""))
|
|
218
219
|
empty_prompt_ids = np.where(df[input_variables].isna().all(axis=1).values)[0]
|
|
219
220
|
|
|
220
|
-
base_template = base_template.replace(
|
|
221
|
+
base_template = base_template.replace("{{", "{").replace("}}", "}")
|
|
221
222
|
prompts = []
|
|
222
223
|
|
|
223
|
-
user_column = args.get(
|
|
224
|
+
user_column = args.get("user_column", USER_COLUMN)
|
|
224
225
|
for i, row in df.iterrows():
|
|
225
226
|
if i not in empty_prompt_ids:
|
|
226
227
|
prompt = PromptTemplate(input_variables=input_variables, template=base_template)
|
|
227
228
|
kwargs = {}
|
|
228
229
|
for col in input_variables:
|
|
229
|
-
kwargs[col] = row[col] if row[col] is not None else
|
|
230
|
+
kwargs[col] = row[col] if row[col] is not None else "" # add empty quote if data is missing
|
|
230
231
|
prompts.append(prompt.format(**kwargs))
|
|
231
232
|
elif row.get(user_column):
|
|
232
233
|
# Just add prompt
|
|
@@ -234,32 +235,37 @@ AI: {response}'''
|
|
|
234
235
|
|
|
235
236
|
def _invoke_agent_executor_with_prompt(agent_executor, prompt):
|
|
236
237
|
if not prompt:
|
|
237
|
-
return
|
|
238
|
+
return ""
|
|
238
239
|
try:
|
|
239
240
|
answer = agent_executor.invoke(prompt)
|
|
240
241
|
except Exception as e:
|
|
241
242
|
answer = str(e)
|
|
242
243
|
if not answer.startswith("Could not parse LLM output: `"):
|
|
243
244
|
raise e
|
|
244
|
-
answer = {
|
|
245
|
+
answer = {"output": answer.removeprefix("Could not parse LLM output: `").removesuffix("`")}
|
|
245
246
|
|
|
246
|
-
if
|
|
247
|
+
if "output" not in answer:
|
|
247
248
|
# This should never happen unless Langchain changes invoke output format, but just in case.
|
|
248
249
|
return agent_executor.run(prompt)
|
|
249
|
-
return answer[
|
|
250
|
+
return answer["output"]
|
|
250
251
|
|
|
251
252
|
completions = []
|
|
252
253
|
# max_workers defaults to number of processors on the machine multiplied by 5.
|
|
253
254
|
# https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
|
|
254
|
-
max_workers = args.get(
|
|
255
|
-
agent_timeout_seconds = args.get(
|
|
255
|
+
max_workers = args.get("max_workers", None)
|
|
256
|
+
agent_timeout_seconds = args.get("timeout", DEFAULT_AGENT_TIMEOUT_SECONDS)
|
|
256
257
|
executor = ContextThreadPoolExecutor(max_workers=max_workers)
|
|
257
258
|
futures = [executor.submit(_invoke_agent_executor_with_prompt, agent, prompt) for prompt in prompts]
|
|
258
259
|
try:
|
|
259
260
|
for future in as_completed(futures, timeout=agent_timeout_seconds):
|
|
260
261
|
completions.append(future.result())
|
|
261
262
|
except TimeoutError:
|
|
262
|
-
completions.append(
|
|
263
|
+
completions.append(
|
|
264
|
+
f"I'm sorry! I couldn't generate a response within the allotted time ({agent_timeout_seconds} seconds). "
|
|
265
|
+
"If you need more time for processing, you can adjust the timeout settings. "
|
|
266
|
+
"Please refer to the documentation for instructions on how to change the timeout value. "
|
|
267
|
+
"Feel free to try your request again."
|
|
268
|
+
)
|
|
263
269
|
# Can't use ThreadPoolExecutor as context manager since we need wait=False.
|
|
264
270
|
executor.shutdown(wait=False)
|
|
265
271
|
|
|
@@ -267,13 +273,13 @@ AI: {response}'''
|
|
|
267
273
|
for i in sorted(empty_prompt_ids)[:-1]:
|
|
268
274
|
completions.insert(i, None)
|
|
269
275
|
|
|
270
|
-
pred_df = pd.DataFrame(completions, columns=[args[
|
|
276
|
+
pred_df = pd.DataFrame(completions, columns=[args["target"]])
|
|
271
277
|
|
|
272
278
|
return pred_df
|
|
273
279
|
|
|
274
280
|
def describe(self, attribute: Optional[str] = None) -> pd.DataFrame:
|
|
275
|
-
tables = [
|
|
276
|
-
return pd.DataFrame(tables, columns=[
|
|
281
|
+
tables = ["info"]
|
|
282
|
+
return pd.DataFrame(tables, columns=["tables"])
|
|
277
283
|
|
|
278
284
|
def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
|
|
279
|
-
raise NotImplementedError(
|
|
285
|
+
raise NotImplementedError("Fine-tuning is not supported for LangChain models")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
lightwood>=25.5.
|
|
2
|
-
lightwood[extra]>=25.5.
|
|
3
|
-
lightwood[xai]>=25.5.
|
|
4
|
-
type_infer==0.0.
|
|
1
|
+
lightwood>=25.7.5.1
|
|
2
|
+
lightwood[extra]>=25.7.5.1
|
|
3
|
+
lightwood[xai]>=25.7.5.1
|
|
4
|
+
type_infer==0.0.23
|
|
@@ -87,6 +87,7 @@ class LiteLLMHandler(BaseMLEngine):
|
|
|
87
87
|
|
|
88
88
|
# check engine_storage for api_key
|
|
89
89
|
input_args.update({k: v for k, v in ml_engine_args.items()})
|
|
90
|
+
input_args["target"] = target
|
|
90
91
|
|
|
91
92
|
# validate args
|
|
92
93
|
export_args = CompletionParameters(**input_args).model_dump()
|
|
@@ -104,6 +105,8 @@ class LiteLLMHandler(BaseMLEngine):
|
|
|
104
105
|
# validate args
|
|
105
106
|
args = CompletionParameters(**input_args).model_dump()
|
|
106
107
|
|
|
108
|
+
target = args.pop("target")
|
|
109
|
+
|
|
107
110
|
# build messages
|
|
108
111
|
self._build_messages(args, df)
|
|
109
112
|
|
|
@@ -113,12 +116,12 @@ class LiteLLMHandler(BaseMLEngine):
|
|
|
113
116
|
if len(args["messages"]) > 1:
|
|
114
117
|
# if more than one message, use batch completion
|
|
115
118
|
responses = batch_completion(**args)
|
|
116
|
-
return pd.DataFrame({
|
|
119
|
+
return pd.DataFrame({target: [response.choices[0].message.content for response in responses]})
|
|
117
120
|
|
|
118
121
|
# run completion
|
|
119
122
|
response = completion(**args)
|
|
120
123
|
|
|
121
|
-
return pd.DataFrame({
|
|
124
|
+
return pd.DataFrame({target: [response.choices[0].message.content]})
|
|
122
125
|
|
|
123
126
|
@staticmethod
|
|
124
127
|
def _prompt_to_messages(prompt: str, **kwargs) -> List[Dict]:
|
|
@@ -31,7 +31,8 @@ class CompletionParameters(BaseModel):
|
|
|
31
31
|
# set api_base, api_version, api_key
|
|
32
32
|
base_url: Optional[str] = None # Base URL of the API.
|
|
33
33
|
api_version: Optional[str] = None # Version of the API to be used.
|
|
34
|
-
api_key: str # API key for authentication.
|
|
34
|
+
api_key: Optional[str] = None # API key for authentication.
|
|
35
|
+
target: Optional[str] = None # the name of output column
|
|
35
36
|
|
|
36
37
|
class Config:
|
|
37
38
|
extra = Extra.forbid
|
|
@@ -1,38 +1,19 @@
|
|
|
1
|
-
OPENAI_API_BASE =
|
|
1
|
+
OPENAI_API_BASE = "https://api.openai.com/v1"
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
'gpt-4-32k',
|
|
9
|
-
'gpt-4-1106-preview',
|
|
10
|
-
'gpt-4-0125-preview',
|
|
11
|
-
'gpt-4o',
|
|
12
|
-
'o3-mini',
|
|
13
|
-
'o1-mini'
|
|
14
|
-
)
|
|
15
|
-
COMPLETION_MODELS = ('babbage-002', 'davinci-002')
|
|
16
|
-
FINETUNING_MODELS = ('gpt-3.5-turbo', 'babbage-002', 'davinci-002', 'gpt-4')
|
|
17
|
-
COMPLETION_LEGACY_BASE_MODELS = ('davinci', 'curie', 'babbage', 'ada')
|
|
18
|
-
DEFAULT_CHAT_MODEL = 'gpt-3.5-turbo'
|
|
3
|
+
CHAT_MODELS_PREFIXES = ("gpt-3.5", "gpt-3.5", "gpt-3.5", "gpt-4", "o3-mini", "o1-mini")
|
|
4
|
+
COMPLETION_MODELS = ("babbage-002", "davinci-002")
|
|
5
|
+
FINETUNING_MODELS = ("gpt-3.5-turbo", "babbage-002", "davinci-002", "gpt-4")
|
|
6
|
+
COMPLETION_LEGACY_BASE_MODELS = ("davinci", "curie", "babbage", "ada")
|
|
7
|
+
DEFAULT_CHAT_MODEL = "gpt-4o-mini"
|
|
19
8
|
|
|
20
9
|
FINETUNING_LEGACY_MODELS = FINETUNING_MODELS
|
|
21
10
|
COMPLETION_LEGACY_MODELS = (
|
|
22
11
|
COMPLETION_LEGACY_BASE_MODELS
|
|
23
|
-
+ tuple(f
|
|
24
|
-
+ (
|
|
12
|
+
+ tuple(f"text-{model}-001" for model in COMPLETION_LEGACY_BASE_MODELS)
|
|
13
|
+
+ ("text-davinci-002", "text-davinci-003")
|
|
25
14
|
)
|
|
26
15
|
|
|
27
|
-
|
|
28
|
-
('text-embedding-ada-002',)
|
|
29
|
-
+ tuple(f'text-similarity-{model}-001' for model in COMPLETION_LEGACY_BASE_MODELS)
|
|
30
|
-
+ tuple(f'text-search-{model}-query-001' for model in COMPLETION_LEGACY_BASE_MODELS)
|
|
31
|
-
+ tuple(f'text-search-{model}-doc-001' for model in COMPLETION_LEGACY_BASE_MODELS)
|
|
32
|
-
+ tuple(f'code-search-{model}-text-001' for model in COMPLETION_LEGACY_BASE_MODELS)
|
|
33
|
-
+ tuple(f'code-search-{model}-code-001' for model in COMPLETION_LEGACY_BASE_MODELS)
|
|
34
|
-
)
|
|
35
|
-
DEFAULT_EMBEDDING_MODEL = 'text-embedding-ada-002'
|
|
16
|
+
DEFAULT_EMBEDDING_MODEL = "text-embedding-ada-002"
|
|
36
17
|
|
|
37
|
-
IMAGE_MODELS = (
|
|
38
|
-
DEFAULT_IMAGE_MODEL =
|
|
18
|
+
IMAGE_MODELS = ("dall-e-2", "dall-e-3")
|
|
19
|
+
DEFAULT_IMAGE_MODEL = "dall-e-2"
|