alita-sdk 0.3.390__py3-none-any.whl → 0.3.417__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/client.py +3 -2
- alita_sdk/runtime/langchain/assistant.py +29 -5
- alita_sdk/runtime/langchain/constants.py +2 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +8 -8
- alita_sdk/runtime/langchain/langraph_agent.py +46 -24
- alita_sdk/runtime/langchain/utils.py +11 -4
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/tools.py +72 -62
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +11 -4
- alita_sdk/runtime/tools/llm.py +142 -116
- alita_sdk/runtime/tools/sandbox.py +15 -31
- alita_sdk/tools/__init__.py +41 -31
- alita_sdk/tools/base_indexer_toolkit.py +27 -2
- alita_sdk/tools/code_indexer_toolkit.py +13 -3
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/gitlab/api_wrapper.py +8 -9
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/qtest/api_wrapper.py +7 -10
- alita_sdk/tools/sharepoint/api_wrapper.py +81 -28
- alita_sdk/tools/sharepoint/authorization_helper.py +131 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/utils/content_parser.py +27 -16
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +10 -2
- {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/RECORD +39 -39
- {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/top_level.txt +0 -0
alita_sdk/runtime/tools/llm.py
CHANGED
|
@@ -7,6 +7,7 @@ from langchain_core.runnables import RunnableConfig
|
|
|
7
7
|
from langchain_core.tools import BaseTool, ToolException
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
|
|
10
|
+
from ..langchain.constants import ELITEA_RS
|
|
10
11
|
from ..langchain.utils import create_pydantic_model, propagate_the_input_mapping
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
@@ -30,6 +31,7 @@ class LLMNode(BaseTool):
|
|
|
30
31
|
structured_output: Optional[bool] = Field(default=False, description='Whether to use structured output')
|
|
31
32
|
available_tools: Optional[List[BaseTool]] = Field(default=None, description='Available tools for binding')
|
|
32
33
|
tool_names: Optional[List[str]] = Field(default=None, description='Specific tool names to filter')
|
|
34
|
+
steps_limit: Optional[int] = Field(default=25, description='Maximum steps for tool execution')
|
|
33
35
|
|
|
34
36
|
def get_filtered_tools(self) -> List[BaseTool]:
|
|
35
37
|
"""
|
|
@@ -88,8 +90,7 @@ class LLMNode(BaseTool):
|
|
|
88
90
|
raise ToolException(f"LLMNode requires 'system' and 'task' parameters in input mapping. "
|
|
89
91
|
f"Actual params: {func_args}")
|
|
90
92
|
# cast to str in case user passes variable different from str
|
|
91
|
-
messages = [SystemMessage(content=str(func_args.get('system'))), HumanMessage(content=str(func_args.get('task')))]
|
|
92
|
-
messages.extend(func_args.get('chat_history', []))
|
|
93
|
+
messages = [SystemMessage(content=str(func_args.get('system'))), *func_args.get('chat_history', []), HumanMessage(content=str(func_args.get('task')))]
|
|
93
94
|
else:
|
|
94
95
|
# Flow for chat-based LLM node w/o prompt/task from pipeline but with messages in state
|
|
95
96
|
# verify messages structure
|
|
@@ -122,14 +123,25 @@ class LLMNode(BaseTool):
|
|
|
122
123
|
}
|
|
123
124
|
for key, value in (self.structured_output_dict or {}).items()
|
|
124
125
|
}
|
|
126
|
+
# Add default output field for proper response to user
|
|
127
|
+
struct_params['elitea_response'] = {'description': 'final output to user', 'type': 'str'}
|
|
125
128
|
struct_model = create_pydantic_model(f"LLMOutput", struct_params)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
+
completion = llm_client.invoke(messages, config=config)
|
|
130
|
+
if hasattr(completion, 'tool_calls') and completion.tool_calls:
|
|
131
|
+
new_messages, _ = self.__perform_tool_calling(completion, messages, llm_client, config)
|
|
132
|
+
llm = self.__get_struct_output_model(llm_client, struct_model)
|
|
133
|
+
completion = llm.invoke(new_messages, config=config)
|
|
134
|
+
result = completion.model_dump()
|
|
135
|
+
else:
|
|
136
|
+
llm = self.__get_struct_output_model(llm_client, struct_model)
|
|
137
|
+
completion = llm.invoke(messages, config=config)
|
|
138
|
+
result = completion.model_dump()
|
|
129
139
|
|
|
130
140
|
# Ensure messages are properly formatted
|
|
131
141
|
if result.get('messages') and isinstance(result['messages'], list):
|
|
132
142
|
result['messages'] = [{'role': 'assistant', 'content': '\n'.join(result['messages'])}]
|
|
143
|
+
else:
|
|
144
|
+
result['messages'] = messages + [AIMessage(content=result.get(ELITEA_RS, ''))]
|
|
133
145
|
|
|
134
146
|
return result
|
|
135
147
|
else:
|
|
@@ -139,117 +151,15 @@ class LLMNode(BaseTool):
|
|
|
139
151
|
# Handle both tool-calling and regular responses
|
|
140
152
|
if hasattr(completion, 'tool_calls') and completion.tool_calls:
|
|
141
153
|
# Handle iterative tool-calling and execution
|
|
142
|
-
new_messages = messages
|
|
143
|
-
max_iterations = 15
|
|
144
|
-
iteration = 0
|
|
145
|
-
|
|
146
|
-
# Continue executing tools until no more tool calls or max iterations reached
|
|
147
|
-
current_completion = completion
|
|
148
|
-
while (hasattr(current_completion, 'tool_calls') and
|
|
149
|
-
current_completion.tool_calls and
|
|
150
|
-
iteration < max_iterations):
|
|
151
|
-
|
|
152
|
-
iteration += 1
|
|
153
|
-
logger.info(f"Tool execution iteration {iteration}/{max_iterations}")
|
|
154
|
-
|
|
155
|
-
# Execute each tool call in the current completion
|
|
156
|
-
tool_calls = current_completion.tool_calls if hasattr(current_completion.tool_calls,
|
|
157
|
-
'__iter__') else []
|
|
158
|
-
|
|
159
|
-
for tool_call in tool_calls:
|
|
160
|
-
tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call,
|
|
161
|
-
'name',
|
|
162
|
-
'')
|
|
163
|
-
tool_args = tool_call.get('args', {}) if isinstance(tool_call, dict) else getattr(tool_call,
|
|
164
|
-
'args',
|
|
165
|
-
{})
|
|
166
|
-
tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(
|
|
167
|
-
tool_call, 'id', '')
|
|
168
|
-
|
|
169
|
-
# Find the tool in filtered tools
|
|
170
|
-
filtered_tools = self.get_filtered_tools()
|
|
171
|
-
tool_to_execute = None
|
|
172
|
-
for tool in filtered_tools:
|
|
173
|
-
if tool.name == tool_name:
|
|
174
|
-
tool_to_execute = tool
|
|
175
|
-
break
|
|
176
|
-
|
|
177
|
-
if tool_to_execute:
|
|
178
|
-
try:
|
|
179
|
-
logger.info(f"Executing tool '{tool_name}' with args: {tool_args}")
|
|
180
|
-
# Pass the underlying config to the tool execution invoke method
|
|
181
|
-
# since it may be another agent, graph, etc. to see it properly in thinking steps
|
|
182
|
-
tool_result = tool_to_execute.invoke(tool_args, config=config)
|
|
183
|
-
|
|
184
|
-
# Create tool message with result - preserve structured content
|
|
185
|
-
from langchain_core.messages import ToolMessage
|
|
186
|
-
|
|
187
|
-
# Check if tool_result is structured content (list of dicts)
|
|
188
|
-
# TODO: need solid check for being compatible with ToolMessage content format
|
|
189
|
-
if isinstance(tool_result, list) and all(
|
|
190
|
-
isinstance(item, dict) and 'type' in item for item in tool_result
|
|
191
|
-
):
|
|
192
|
-
# Use structured content directly for multimodal support
|
|
193
|
-
tool_message = ToolMessage(
|
|
194
|
-
content=tool_result,
|
|
195
|
-
tool_call_id=tool_call_id
|
|
196
|
-
)
|
|
197
|
-
else:
|
|
198
|
-
# Fallback to string conversion for other tool results
|
|
199
|
-
tool_message = ToolMessage(
|
|
200
|
-
content=str(tool_result),
|
|
201
|
-
tool_call_id=tool_call_id
|
|
202
|
-
)
|
|
203
|
-
new_messages.append(tool_message)
|
|
204
|
-
|
|
205
|
-
except Exception as e:
|
|
206
|
-
logger.error(f"Error executing tool '{tool_name}': {e}")
|
|
207
|
-
# Create error tool message
|
|
208
|
-
from langchain_core.messages import ToolMessage
|
|
209
|
-
tool_message = ToolMessage(
|
|
210
|
-
content=f"Error executing {tool_name}: {str(e)}",
|
|
211
|
-
tool_call_id=tool_call_id
|
|
212
|
-
)
|
|
213
|
-
new_messages.append(tool_message)
|
|
214
|
-
else:
|
|
215
|
-
logger.warning(f"Tool '{tool_name}' not found in available tools")
|
|
216
|
-
# Create error tool message for missing tool
|
|
217
|
-
from langchain_core.messages import ToolMessage
|
|
218
|
-
tool_message = ToolMessage(
|
|
219
|
-
content=f"Tool '{tool_name}' not available",
|
|
220
|
-
tool_call_id=tool_call_id
|
|
221
|
-
)
|
|
222
|
-
new_messages.append(tool_message)
|
|
223
|
-
|
|
224
|
-
# Call LLM again with tool results to get next response
|
|
225
|
-
try:
|
|
226
|
-
current_completion = llm_client.invoke(new_messages, config=config)
|
|
227
|
-
new_messages.append(current_completion)
|
|
228
|
-
|
|
229
|
-
# Check if we still have tool calls
|
|
230
|
-
if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
|
|
231
|
-
logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls")
|
|
232
|
-
else:
|
|
233
|
-
logger.info("LLM completed without requesting more tools")
|
|
234
|
-
break
|
|
235
|
-
|
|
236
|
-
except Exception as e:
|
|
237
|
-
logger.error(f"Error in LLM call during iteration {iteration}: {e}")
|
|
238
|
-
# Add error message and break the loop
|
|
239
|
-
error_msg = f"Error processing tool results in iteration {iteration}: {str(e)}"
|
|
240
|
-
new_messages.append(AIMessage(content=error_msg))
|
|
241
|
-
break
|
|
242
|
-
|
|
243
|
-
# Log completion status
|
|
244
|
-
if iteration >= max_iterations:
|
|
245
|
-
logger.warning(f"Reached maximum iterations ({max_iterations}) for tool execution")
|
|
246
|
-
# Add a warning message to the chat
|
|
247
|
-
warning_msg = f"Maximum tool execution iterations ({max_iterations}) reached. Stopping tool execution."
|
|
248
|
-
new_messages.append(AIMessage(content=warning_msg))
|
|
249
|
-
else:
|
|
250
|
-
logger.info(f"Tool execution completed after {iteration} iterations")
|
|
154
|
+
new_messages, current_completion = self.__perform_tool_calling(completion, messages, llm_client, config)
|
|
251
155
|
|
|
252
|
-
|
|
156
|
+
output_msgs = {"messages": new_messages}
|
|
157
|
+
if self.output_variables:
|
|
158
|
+
if self.output_variables[0] == 'messages':
|
|
159
|
+
return output_msgs
|
|
160
|
+
output_msgs[self.output_variables[0]] = current_completion.content if current_completion else None
|
|
161
|
+
|
|
162
|
+
return output_msgs
|
|
253
163
|
else:
|
|
254
164
|
# Regular text response
|
|
255
165
|
content = completion.content.strip() if hasattr(completion, 'content') else str(completion)
|
|
@@ -275,4 +185,120 @@ class LLMNode(BaseTool):
|
|
|
275
185
|
|
|
276
186
|
def _run(self, *args, **kwargs):
|
|
277
187
|
# Legacy support for old interface
|
|
278
|
-
return self.invoke(kwargs, **kwargs)
|
|
188
|
+
return self.invoke(kwargs, **kwargs)
|
|
189
|
+
|
|
190
|
+
def __perform_tool_calling(self, completion, messages, llm_client, config):
|
|
191
|
+
# Handle iterative tool-calling and execution
|
|
192
|
+
new_messages = messages + [completion]
|
|
193
|
+
iteration = 0
|
|
194
|
+
|
|
195
|
+
# Continue executing tools until no more tool calls or max iterations reached
|
|
196
|
+
current_completion = completion
|
|
197
|
+
while (hasattr(current_completion, 'tool_calls') and
|
|
198
|
+
current_completion.tool_calls and
|
|
199
|
+
iteration < self.steps_limit):
|
|
200
|
+
|
|
201
|
+
iteration += 1
|
|
202
|
+
logger.info(f"Tool execution iteration {iteration}/{self.steps_limit}")
|
|
203
|
+
|
|
204
|
+
# Execute each tool call in the current completion
|
|
205
|
+
tool_calls = current_completion.tool_calls if hasattr(current_completion.tool_calls,
|
|
206
|
+
'__iter__') else []
|
|
207
|
+
|
|
208
|
+
for tool_call in tool_calls:
|
|
209
|
+
tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call,
|
|
210
|
+
'name',
|
|
211
|
+
'')
|
|
212
|
+
tool_args = tool_call.get('args', {}) if isinstance(tool_call, dict) else getattr(tool_call,
|
|
213
|
+
'args',
|
|
214
|
+
{})
|
|
215
|
+
tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(
|
|
216
|
+
tool_call, 'id', '')
|
|
217
|
+
|
|
218
|
+
# Find the tool in filtered tools
|
|
219
|
+
filtered_tools = self.get_filtered_tools()
|
|
220
|
+
tool_to_execute = None
|
|
221
|
+
for tool in filtered_tools:
|
|
222
|
+
if tool.name == tool_name:
|
|
223
|
+
tool_to_execute = tool
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
if tool_to_execute:
|
|
227
|
+
try:
|
|
228
|
+
logger.info(f"Executing tool '{tool_name}' with args: {tool_args}")
|
|
229
|
+
# Pass the underlying config to the tool execution invoke method
|
|
230
|
+
# since it may be another agent, graph, etc. to see it properly in thinking steps
|
|
231
|
+
tool_result = tool_to_execute.invoke(tool_args, config=config)
|
|
232
|
+
|
|
233
|
+
# Create tool message with result - preserve structured content
|
|
234
|
+
from langchain_core.messages import ToolMessage
|
|
235
|
+
|
|
236
|
+
# Check if tool_result is structured content (list of dicts)
|
|
237
|
+
# TODO: need solid check for being compatible with ToolMessage content format
|
|
238
|
+
if isinstance(tool_result, list) and all(
|
|
239
|
+
isinstance(item, dict) and 'type' in item for item in tool_result
|
|
240
|
+
):
|
|
241
|
+
# Use structured content directly for multimodal support
|
|
242
|
+
tool_message = ToolMessage(
|
|
243
|
+
content=tool_result,
|
|
244
|
+
tool_call_id=tool_call_id
|
|
245
|
+
)
|
|
246
|
+
else:
|
|
247
|
+
# Fallback to string conversion for other tool results
|
|
248
|
+
tool_message = ToolMessage(
|
|
249
|
+
content=str(tool_result),
|
|
250
|
+
tool_call_id=tool_call_id
|
|
251
|
+
)
|
|
252
|
+
new_messages.append(tool_message)
|
|
253
|
+
|
|
254
|
+
except Exception as e:
|
|
255
|
+
logger.error(f"Error executing tool '{tool_name}': {e}")
|
|
256
|
+
# Create error tool message
|
|
257
|
+
from langchain_core.messages import ToolMessage
|
|
258
|
+
tool_message = ToolMessage(
|
|
259
|
+
content=f"Error executing {tool_name}: {str(e)}",
|
|
260
|
+
tool_call_id=tool_call_id
|
|
261
|
+
)
|
|
262
|
+
new_messages.append(tool_message)
|
|
263
|
+
else:
|
|
264
|
+
logger.warning(f"Tool '{tool_name}' not found in available tools")
|
|
265
|
+
# Create error tool message for missing tool
|
|
266
|
+
from langchain_core.messages import ToolMessage
|
|
267
|
+
tool_message = ToolMessage(
|
|
268
|
+
content=f"Tool '{tool_name}' not available",
|
|
269
|
+
tool_call_id=tool_call_id
|
|
270
|
+
)
|
|
271
|
+
new_messages.append(tool_message)
|
|
272
|
+
|
|
273
|
+
# Call LLM again with tool results to get next response
|
|
274
|
+
try:
|
|
275
|
+
current_completion = llm_client.invoke(new_messages, config=config)
|
|
276
|
+
new_messages.append(current_completion)
|
|
277
|
+
|
|
278
|
+
# Check if we still have tool calls
|
|
279
|
+
if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
|
|
280
|
+
logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls")
|
|
281
|
+
else:
|
|
282
|
+
logger.info("LLM completed without requesting more tools")
|
|
283
|
+
break
|
|
284
|
+
|
|
285
|
+
except Exception as e:
|
|
286
|
+
logger.error(f"Error in LLM call during iteration {iteration}: {e}")
|
|
287
|
+
# Add error message and break the loop
|
|
288
|
+
error_msg = f"Error processing tool results in iteration {iteration}: {str(e)}"
|
|
289
|
+
new_messages.append(AIMessage(content=error_msg))
|
|
290
|
+
break
|
|
291
|
+
|
|
292
|
+
# Log completion status
|
|
293
|
+
if iteration >= self.steps_limit:
|
|
294
|
+
logger.warning(f"Reached maximum iterations ({self.steps_limit}) for tool execution")
|
|
295
|
+
# Add a warning message to the chat
|
|
296
|
+
warning_msg = f"Maximum tool execution iterations ({self.steps_limit}) reached. Stopping tool execution."
|
|
297
|
+
new_messages.append(AIMessage(content=warning_msg))
|
|
298
|
+
else:
|
|
299
|
+
logger.info(f"Tool execution completed after {iteration} iterations")
|
|
300
|
+
|
|
301
|
+
return new_messages, current_completion
|
|
302
|
+
|
|
303
|
+
def __get_struct_output_model(self, llm_client, pydantic_model):
|
|
304
|
+
return llm_client.with_structured_output(pydantic_model)
|
|
@@ -64,36 +64,10 @@ def _is_deno_available() -> bool:
|
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
def _setup_pyodide_cache_env() -> None:
|
|
67
|
-
"""Setup Pyodide caching environment variables for performance optimization"""
|
|
67
|
+
"""Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
|
|
68
68
|
try:
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
if os.path.exists(cache_env_file):
|
|
72
|
-
with open(cache_env_file, 'r') as f:
|
|
73
|
-
for line in f:
|
|
74
|
-
line = line.strip()
|
|
75
|
-
if line.startswith('export ') and '=' in line:
|
|
76
|
-
# Parse export VAR=value format
|
|
77
|
-
var_assignment = line[7:] # Remove 'export '
|
|
78
|
-
if '=' in var_assignment:
|
|
79
|
-
key, value = var_assignment.split('=', 1)
|
|
80
|
-
# Remove quotes if present
|
|
81
|
-
value = value.strip('"').strip("'")
|
|
82
|
-
os.environ[key] = value
|
|
83
|
-
logger.debug(f"Set Pyodide cache env: {key}={value}")
|
|
84
|
-
|
|
85
|
-
# Set default caching environment variables if not already set
|
|
86
|
-
cache_defaults = {
|
|
87
|
-
'PYODIDE_PACKAGES_PATH': os.path.expanduser('~/.cache/pyodide'),
|
|
88
|
-
'DENO_DIR': os.path.expanduser('~/.cache/deno'),
|
|
89
|
-
'PYODIDE_CACHE_DIR': os.path.expanduser('~/.cache/pyodide'),
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
for key, default_value in cache_defaults.items():
|
|
93
|
-
if key not in os.environ:
|
|
94
|
-
os.environ[key] = default_value
|
|
95
|
-
logger.debug(f"Set default Pyodide env: {key}={default_value}")
|
|
96
|
-
|
|
69
|
+
for key in ["SANDBOX_BASE", "DENO_DIR"]:
|
|
70
|
+
logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
|
|
97
71
|
except Exception as e:
|
|
98
72
|
logger.warning(f"Could not setup Pyodide cache environment: {e}")
|
|
99
73
|
|
|
@@ -142,7 +116,7 @@ class PyodideSandboxTool(BaseTool):
|
|
|
142
116
|
def _prepare_pyodide_input(self, code: str) -> str:
|
|
143
117
|
"""Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
|
|
144
118
|
pyodide_predata = ""
|
|
145
|
-
|
|
119
|
+
|
|
146
120
|
# Add alita_client if available
|
|
147
121
|
if self.alita_client:
|
|
148
122
|
try:
|
|
@@ -158,7 +132,7 @@ class PyodideSandboxTool(BaseTool):
|
|
|
158
132
|
f"auth_token='{self.alita_client.auth_token}')\n")
|
|
159
133
|
except FileNotFoundError:
|
|
160
134
|
logger.error(f"sandbox_client.py not found. Ensure the file exists.")
|
|
161
|
-
|
|
135
|
+
|
|
162
136
|
return f"#elitea simplified client\n{pyodide_predata}{code}"
|
|
163
137
|
|
|
164
138
|
def _initialize_sandbox(self) -> None:
|
|
@@ -175,9 +149,19 @@ class PyodideSandboxTool(BaseTool):
|
|
|
175
149
|
|
|
176
150
|
from langchain_sandbox import PyodideSandbox
|
|
177
151
|
|
|
152
|
+
# Air-gapped settings
|
|
153
|
+
sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
|
|
154
|
+
sandbox_tmp = os.path.join(sandbox_base, "tmp")
|
|
155
|
+
deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
|
|
156
|
+
|
|
178
157
|
# Configure sandbox with performance optimizations
|
|
179
158
|
self._sandbox = PyodideSandbox(
|
|
180
159
|
stateful=self.stateful,
|
|
160
|
+
#
|
|
161
|
+
allow_env=["SANDBOX_BASE"],
|
|
162
|
+
allow_read=[sandbox_base, sandbox_tmp, deno_cache],
|
|
163
|
+
allow_write=[sandbox_tmp, deno_cache],
|
|
164
|
+
#
|
|
181
165
|
allow_net=self.allow_net,
|
|
182
166
|
# Use auto node_modules_dir for better caching
|
|
183
167
|
node_modules_dir="auto"
|
alita_sdk/tools/__init__.py
CHANGED
|
@@ -90,64 +90,74 @@ available_count = len(AVAILABLE_TOOLS)
|
|
|
90
90
|
total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
|
|
91
91
|
logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
|
|
92
92
|
|
|
93
|
+
|
|
93
94
|
def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
|
|
94
95
|
tools = []
|
|
96
|
+
|
|
95
97
|
for tool in tools_list:
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if not tool.get('settings'):
|
|
98
|
+
settings = tool.get('settings')
|
|
99
|
+
|
|
100
|
+
# Skip tools without settings early
|
|
101
|
+
if not settings:
|
|
101
102
|
logger.warning(f"Tool '{tool.get('type', '')}' has no settings, skipping...")
|
|
102
103
|
continue
|
|
103
|
-
|
|
104
|
-
tool
|
|
105
|
-
|
|
104
|
+
|
|
105
|
+
# Validate tool names once
|
|
106
|
+
selected_tools = settings.get('selected_tools', [])
|
|
107
|
+
invalid_tools = [name for name in selected_tools if isinstance(name, str) and name.startswith('_')]
|
|
108
|
+
if invalid_tools:
|
|
109
|
+
raise ValueError(f"Tool names {invalid_tools} from toolkit '{tool.get('type', '')}' cannot start with '_'")
|
|
110
|
+
|
|
111
|
+
# Cache tool type and add common settings
|
|
106
112
|
tool_type = tool['type']
|
|
113
|
+
settings['alita'] = alita
|
|
114
|
+
settings['llm'] = llm
|
|
115
|
+
settings['store'] = store
|
|
107
116
|
|
|
108
|
-
#
|
|
117
|
+
# Set pgvector collection schema if present
|
|
118
|
+
if settings.get('pgvector_configuration'):
|
|
119
|
+
settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
|
|
120
|
+
|
|
121
|
+
# Handle ADO special cases
|
|
109
122
|
if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
|
|
110
123
|
tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
|
|
124
|
+
continue
|
|
111
125
|
|
|
112
|
-
#
|
|
113
|
-
|
|
126
|
+
# Handle ADO repos aliases
|
|
127
|
+
if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
|
|
114
128
|
try:
|
|
115
|
-
|
|
116
|
-
tools.extend(get_tools_func(tool))
|
|
117
|
-
|
|
129
|
+
tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
|
|
118
130
|
except Exception as e:
|
|
119
|
-
logger.error(f"Error getting
|
|
120
|
-
|
|
131
|
+
logger.error(f"Error getting ADO repos tools: {e}")
|
|
132
|
+
continue
|
|
121
133
|
|
|
122
|
-
# Handle
|
|
123
|
-
|
|
134
|
+
# Handle standard tools
|
|
135
|
+
if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
|
|
124
136
|
try:
|
|
125
|
-
|
|
126
|
-
tools.extend(get_tools_func(tool))
|
|
137
|
+
tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
|
|
127
138
|
except Exception as e:
|
|
128
|
-
logger.error(f"Error getting
|
|
139
|
+
logger.error(f"Error getting tools for {tool_type}: {e}")
|
|
140
|
+
raise ToolException(f"Error getting tools for {tool_type}: {e}")
|
|
141
|
+
continue
|
|
129
142
|
|
|
130
143
|
# Handle custom modules
|
|
131
|
-
|
|
144
|
+
if settings.get("module"):
|
|
132
145
|
try:
|
|
133
|
-
settings = tool.get("settings", {})
|
|
134
146
|
mod = import_module(settings.pop("module"))
|
|
135
147
|
tkitclass = getattr(mod, settings.pop("class"))
|
|
136
|
-
|
|
137
|
-
get_toolkit_params = tool["settings"].copy()
|
|
148
|
+
get_toolkit_params = settings.copy()
|
|
138
149
|
get_toolkit_params["name"] = tool.get("name")
|
|
139
|
-
#
|
|
140
150
|
toolkit = tkitclass.get_toolkit(**get_toolkit_params)
|
|
141
151
|
tools.extend(toolkit.get_tools())
|
|
142
152
|
except Exception as e:
|
|
143
153
|
logger.error(f"Error in getting custom toolkit: {e}")
|
|
154
|
+
continue
|
|
144
155
|
|
|
156
|
+
# Tool not available
|
|
157
|
+
if tool_type in FAILED_IMPORTS:
|
|
158
|
+
logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
|
|
145
159
|
else:
|
|
146
|
-
|
|
147
|
-
if tool_type in FAILED_IMPORTS:
|
|
148
|
-
logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
|
|
149
|
-
else:
|
|
150
|
-
logger.warning(f"Unknown tool type: {tool_type}")
|
|
160
|
+
logger.warning(f"Unknown tool type: {tool_type}")
|
|
151
161
|
|
|
152
162
|
return tools
|
|
153
163
|
|
|
@@ -110,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
110
110
|
def __init__(self, **kwargs):
|
|
111
111
|
conn = kwargs.get('connection_string', None)
|
|
112
112
|
connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
|
|
113
|
-
collection_name = kwargs.get('
|
|
113
|
+
collection_name = kwargs.get('collection_schema')
|
|
114
114
|
|
|
115
115
|
if 'vectorstore_type' not in kwargs:
|
|
116
116
|
kwargs['vectorstore_type'] = 'PGVector'
|
|
@@ -160,6 +160,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
160
160
|
if clean_index:
|
|
161
161
|
self._clean_index(index_name)
|
|
162
162
|
#
|
|
163
|
+
self.index_meta_init(index_name, kwargs)
|
|
164
|
+
#
|
|
163
165
|
self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
|
|
164
166
|
self._log_tool_event(f"Loading the documents to index...{kwargs}")
|
|
165
167
|
documents = self._base_loader(**kwargs)
|
|
@@ -179,7 +181,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
179
181
|
return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
|
|
180
182
|
else "no new documents to index"}
|
|
181
183
|
except Exception as e:
|
|
182
|
-
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value,
|
|
184
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
|
|
183
185
|
raise e
|
|
184
186
|
|
|
185
187
|
|
|
@@ -454,6 +456,29 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
454
456
|
reranking_config=reranking_config,
|
|
455
457
|
extended_search=extended_search
|
|
456
458
|
)
|
|
459
|
+
|
|
460
|
+
def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
|
|
461
|
+
index_meta = super().get_index_meta(index_name)
|
|
462
|
+
if not index_meta:
|
|
463
|
+
self._log_tool_event(
|
|
464
|
+
f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
|
|
465
|
+
tool_name="index_data"
|
|
466
|
+
)
|
|
467
|
+
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
468
|
+
created_on = time.time()
|
|
469
|
+
metadata = {
|
|
470
|
+
"collection": index_name,
|
|
471
|
+
"type": IndexerKeywords.INDEX_META_TYPE.value,
|
|
472
|
+
"indexed": 0,
|
|
473
|
+
"state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
|
|
474
|
+
"index_configuration": index_configuration,
|
|
475
|
+
"created_on": created_on,
|
|
476
|
+
"updated_on": created_on,
|
|
477
|
+
"history": "[]",
|
|
478
|
+
"task_id": None,
|
|
479
|
+
}
|
|
480
|
+
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
481
|
+
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
|
|
457
482
|
|
|
458
483
|
def index_meta_update(self, index_name: str, state: str, result: int):
|
|
459
484
|
index_meta_raw = super().get_index_meta(index_name)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import fnmatch
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
from typing import Optional, List, Generator
|
|
5
6
|
|
|
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
21
22
|
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
22
23
|
|
|
23
24
|
def key_fn(self, document: Document):
|
|
24
|
-
return document.metadata.get(
|
|
25
|
+
return document.metadata.get("filename")
|
|
25
26
|
|
|
26
27
|
def compare_fn(self, document: Document, idx_data):
|
|
27
28
|
return (document.metadata.get('commit_hash') and
|
|
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
def _extend_data(self, documents: Generator[Document, None, None]):
|
|
49
|
-
yield from
|
|
50
|
+
yield from documents
|
|
50
51
|
|
|
51
52
|
def _index_tool_params(self):
|
|
52
53
|
"""Return the parameters for indexing data."""
|
|
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
117
118
|
if not file_content:
|
|
118
119
|
# empty file, skip
|
|
119
120
|
continue
|
|
121
|
+
#
|
|
122
|
+
# ensure file content is a string
|
|
123
|
+
if isinstance(file_content, bytes):
|
|
124
|
+
file_content = file_content.decode("utf-8", errors="ignore")
|
|
125
|
+
elif isinstance(file_content, dict) and file.endswith('.json'):
|
|
126
|
+
file_content = json.dumps(file_content)
|
|
127
|
+
elif not isinstance(file_content, str):
|
|
128
|
+
file_content = str(file_content)
|
|
129
|
+
#
|
|
120
130
|
# hash the file content to ensure uniqueness
|
|
121
131
|
import hashlib
|
|
122
132
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
127
137
|
self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
|
|
128
138
|
self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
|
|
129
139
|
|
|
130
|
-
return file_content_generator()
|
|
140
|
+
return parse_code_files_for_db(file_content_generator())
|
|
131
141
|
|
|
132
142
|
def __handle_get_files(self, path: str, branch: str):
|
|
133
143
|
"""
|
|
@@ -3,6 +3,7 @@ from typing import Optional, List
|
|
|
3
3
|
from logging import getLogger
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from langchain_core.documents import Document
|
|
6
7
|
|
|
7
8
|
logger = getLogger(__name__)
|
|
8
9
|
from PIL import Image
|
|
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
193
194
|
else:
|
|
194
195
|
return super().process_image(link, ocr_languages)
|
|
195
196
|
|
|
197
|
+
def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
|
|
198
|
+
content_format: ContentFormat, ocr_languages: Optional[str] = None,
|
|
199
|
+
keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
|
|
200
|
+
if not page.get("title"):
|
|
201
|
+
# if 'include_restricted_content' set to True, draft pages are loaded and can have no title
|
|
202
|
+
page["title"] = "Untitled"
|
|
203
|
+
return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
|
|
204
|
+
ocr_languages, keep_markdown_format, keep_newlines)
|
|
205
|
+
|
|
196
206
|
# TODO review usage
|
|
197
207
|
# def process_svg(
|
|
198
208
|
# self,
|
|
@@ -115,9 +115,8 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
115
115
|
"""Remove trailing slash from URL if present."""
|
|
116
116
|
return url.rstrip('/') if url else url
|
|
117
117
|
|
|
118
|
-
@model_validator(mode='
|
|
119
|
-
|
|
120
|
-
def validate_toolkit(cls, values: Dict) -> Dict:
|
|
118
|
+
@model_validator(mode='after')
|
|
119
|
+
def validate_toolkit(self):
|
|
121
120
|
try:
|
|
122
121
|
import gitlab
|
|
123
122
|
except ImportError:
|
|
@@ -125,17 +124,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
125
124
|
"python-gitlab is not installed. "
|
|
126
125
|
"Please install it with `pip install python-gitlab`"
|
|
127
126
|
)
|
|
128
|
-
|
|
127
|
+
self.repository = self._sanitize_url(self.repository)
|
|
129
128
|
g = gitlab.Gitlab(
|
|
130
|
-
url=
|
|
131
|
-
private_token=
|
|
129
|
+
url=self._sanitize_url(self.url),
|
|
130
|
+
private_token=self.private_token.get_secret_value(),
|
|
132
131
|
keep_base_url=True,
|
|
133
132
|
)
|
|
134
133
|
|
|
135
134
|
g.auth()
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
return
|
|
135
|
+
self._git = g
|
|
136
|
+
self._active_branch = self.branch
|
|
137
|
+
return self
|
|
139
138
|
|
|
140
139
|
@property
|
|
141
140
|
def repo_instance(self):
|